diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..134666a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/glusterfs-6.0.tar.gz diff --git a/.glusterfs.metadata b/.glusterfs.metadata new file mode 100644 index 0000000..98d5fc3 --- /dev/null +++ b/.glusterfs.metadata @@ -0,0 +1 @@ +c9d75f37e00502a10f64cd4ba9aafb17552e0800 SOURCES/glusterfs-6.0.tar.gz diff --git a/SOURCES/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch b/SOURCES/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch new file mode 100644 index 0000000..9ca880d --- /dev/null +++ b/SOURCES/0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch @@ -0,0 +1,1557 @@ +From 78060c16f88594b3424e512a9ef0e4a8f56e88c3 Mon Sep 17 00:00:00 2001 +From: Kaushal M <kmadappa@redhat.com> +Date: Thu, 6 Dec 2018 15:04:16 +0530 +Subject: [PATCH 02/52] glusterd: fix op-versions for RHS backwards + compatability + +Backport of https://code.engineering.redhat.com/gerrit/#/c/60485/ + +This change fixes the op-version of different features and checks to maintain +backwards compatability with RHS-3.0 and before. + +Label: DOWNSTREAM ONLY + +Change-Id: Icb282444da179b12fbd6ed9f491514602f1a38c2 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/70348 +--- + libglusterfs/src/glusterfs/globals.h | 45 +++-- + rpc/rpc-transport/socket/src/socket.c | 4 +- + xlators/cluster/dht/src/dht-shared.c | 6 +- + xlators/debug/io-stats/src/io-stats.c | 16 +- + xlators/features/barrier/src/barrier.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-handler.c | 14 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 +- + xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 6 +- + xlators/mgmt/glusterd/src/glusterd-sm.c | 2 +- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 12 +- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-store.c | 27 +-- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 12 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 192 +++++++++++---------- + xlators/protocol/client/src/client.c | 4 +- + xlators/protocol/server/src/server.c | 6 +- + xlators/storage/posix/src/posix-common.c | 4 +- + 24 files changed, 214 insertions(+), 197 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 8d898c3..b9da872 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -23,23 +23,28 @@ + #define GF_AVOID_OVERWRITE "glusterfs.avoid.overwrite" + #define GF_CLEAN_WRITE_PROTECTION "glusterfs.clean.writexattr" + +-/* Gluster versions - OP-VERSION mapping ++/* RHS versions - OP-VERSION mapping + * +- * 3.3.x - 1 +- * 3.4.x - 2 +- * 3.5.0 - 3 +- * 3.5.1 - 30501 +- * 3.6.0 - 30600 +- * 3.7.0 - 30700 +- * 3.7.1 - 30701 +- * 3.7.2 - 30702 ++ * RHS-2.0 Z - 1 ++ * RHS-2.1 Z - 2 ++ * RHS-2.1 u5 - 20105 ++ * RHS-3.0 - 30000 ++ * RHS-3.0.4 - 30004 ++ * RHGS-3.1 - 30702 + * +- * Starting with Gluster v3.6, the op-version will be multi-digit integer values +- * based on the Glusterfs version, instead of a simply incrementing integer +- * value. The op-version for a given X.Y.Z release will be an integer XYZ, with +- * Y and Z 2 digit always 2 digits wide and padded with 0 when needed. This +- * should allow for some gaps between two Y releases for backports of features +- * in Z releases. ++ * ++ * NOTE: ++ * Starting with RHS-3.0, the op-version will be multi-digit integer values ++ * based on the RHS version, instead of a simply incrementing integer value. The ++ * op-version for a given RHS X(Major).Y(Minor).Z(Update) release will be an ++ * integer with digits XYZ. The Y and Z values will be 2 digits wide always ++ * padded with 0 as needed. This should allow for some gaps between two Y ++ * releases for backports of features in Z releases. ++ * ++ * NOTE: ++ * Starting with RHGS-3.1, the op-version will be the same as the upstream ++ * GlusterFS op-versions. This is to allow proper access to upstream clients of ++ * version 3.7.x or greater, proper access to the RHGS volumes. + */ + #define GD_OP_VERSION_MIN \ + 1 /* MIN is the fresh start op-version, mostly \ +@@ -51,7 +56,13 @@ + introduction of newer \ + versions */ + +-#define GD_OP_VERSION_3_6_0 30600 /* Op-Version for GlusterFS 3.6.0 */ ++#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */ ++ ++#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_RHS_3_0 ++ ++#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */ ++ ++#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */ + + #define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */ + +@@ -115,8 +126,6 @@ + + #define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ + +-#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0 +- + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index fa0e0f2..121d46b 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4704,7 +4704,7 @@ struct volume_options options[] = { + .description = "SSL CA list. Ignored if SSL is not enabled."}, + {.key = {"ssl-cert-depth"}, + .type = GF_OPTION_TYPE_INT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Maximum certificate-chain depth. If zero, the " + "peer's certificate itself must be in the local " +@@ -4713,7 +4713,7 @@ struct volume_options options[] = { + "local list. Ignored if SSL is not enabled."}, + {.key = {"ssl-cipher-list"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "Allowed SSL ciphers. Ignored if SSL is not enabled."}, + {.key = {"ssl-dh-param"}, +diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c +index c7ef2f1..ea4b7c6 100644 +--- a/xlators/cluster/dht/src/dht-shared.c ++++ b/xlators/cluster/dht/src/dht-shared.c +@@ -1064,7 +1064,7 @@ struct volume_options dht_options[] = { + "When enabled, files will be allocated to bricks " + "with a probability proportional to their size. Otherwise, all " + "bricks will have the same probability (legacy behavior).", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + +@@ -1161,7 +1161,7 @@ struct volume_options dht_options[] = { + "from which hash ranges are allocated starting with 0. " + "Note that we still use a directory/file's name to determine the " + "subvolume to which it hashes", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + }, + + {.key = {"rebal-throttle"}, +@@ -1174,7 +1174,7 @@ struct volume_options dht_options[] = { + "migrated at a time. Lazy will allow only one file to " + "be migrated at a time and aggressive will allow " + "max of [($(processing units) - 4) / 2), 4]", +- .op_version = {GD_OP_VERSION_3_7_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .level = OPT_STATUS_BASIC, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC + +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index f12191f..41b57c5 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -4333,7 +4333,7 @@ struct volume_options options[] = { + .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}}, + {.key = {"client-logger"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOGGER_GLUSTER_LOG, +@@ -4342,7 +4342,7 @@ struct volume_options options[] = { + .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}}, + {.key = {"brick-logger"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOGGER_GLUSTER_LOG, +@@ -4354,7 +4354,7 @@ struct volume_options options[] = { + .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}}, + {.key = {"client-log-format"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOG_FORMAT_WITH_MSG_ID, +@@ -4362,7 +4362,7 @@ struct volume_options options[] = { + .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}}, + {.key = {"brick-log-format"}, + .type = GF_OPTION_TYPE_STR, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .default_value = GF_LOG_FORMAT_WITH_MSG_ID, +@@ -4377,7 +4377,7 @@ struct volume_options options[] = { + }, + {.key = {"client-log-buf-size"}, + .type = GF_OPTION_TYPE_INT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_LRU_BUFSIZE_MIN, +@@ -4388,7 +4388,7 @@ struct volume_options options[] = { + " the value of the option client-log-flush-timeout."}, + {.key = {"brick-log-buf-size"}, + .type = GF_OPTION_TYPE_INT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_LRU_BUFSIZE_MIN, +@@ -4406,7 +4406,7 @@ struct volume_options options[] = { + }, + {.key = {"client-log-flush-timeout"}, + .type = GF_OPTION_TYPE_TIME, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_FLUSH_TIMEOUT_MIN, +@@ -4417,7 +4417,7 @@ struct volume_options options[] = { + " the value of the option client-log-flush-timeout."}, + {.key = {"brick-log-flush-timeout"}, + .type = GF_OPTION_TYPE_TIME, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .tags = {"io-stats"}, + .min = GF_LOG_FLUSH_TIMEOUT_MIN, +diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c +index a601c7f..0923992 100644 +--- a/xlators/features/barrier/src/barrier.c ++++ b/xlators/features/barrier/src/barrier.c +@@ -774,7 +774,7 @@ struct volume_options options[] = { + {.key = {"barrier"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "disable", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "When \"enabled\", blocks acknowledgements to application " + "for file operations such as rmdir, rename, unlink, " +@@ -784,7 +784,7 @@ struct volume_options options[] = { + {.key = {"barrier-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = BARRIER_TIMEOUT, +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE, + .description = "After 'timeout' seconds since the time 'barrier' " + "option was set to \"on\", acknowledgements to file " +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index 38483a1..ad9a572 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -1195,7 +1195,7 @@ glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count, + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + brick_mount_dir = NULL; + + snprintf(key, sizeof(key), "brick%d.mount_dir", i); +@@ -1729,7 +1729,7 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = glusterd_get_brick_mount_dir( + brickinfo->path, brickinfo->hostname, brickinfo->mount_dir); + if (ret) { +@@ -2085,12 +2085,12 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + } + + /* Check if the connected clients are all of version +- * glusterfs-3.6 and higher. This is needed to prevent some data ++ * RHS-2.1u5 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. + */ + ret = glusterd_check_client_op_version_support( +- volname, GD_OP_VERSION_3_6_0, NULL); ++ volname, GD_OP_VERSION_RHS_2_1_5, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index f754b52..387643d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -763,7 +763,7 @@ glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + } + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + ret = glusterd_lock(MY_UUID); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL, +@@ -818,7 +818,7 @@ glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + local_locking_done: + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ +- if (volname || (priv->op_version < GD_OP_VERSION_3_6_0)) ++ if (volname || (priv->op_version < GD_OP_VERSION_RHS_3_0)) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; +@@ -849,7 +849,7 @@ out: + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) + glusterd_unlock(MY_UUID); + else { + ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, "vol"); +@@ -4432,12 +4432,12 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req) + } + + if ((cmd & GF_CLI_STATUS_SNAPD) && +- (conf->op_version < GD_OP_VERSION_3_6_0)) { ++ (conf->op_version < GD_OP_VERSION_RHS_3_0)) { + snprintf(err_str, sizeof(err_str), + "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "snapd is not allowed in this state", +- GD_OP_VERSION_3_6_0); ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +@@ -4459,7 +4459,7 @@ __glusterd_handle_status_volume(rpcsvc_request_t *req) + "The cluster is operating " + "at a lesser version than %d. Getting the status of " + "tierd is not allowed in this state", +- GD_OP_VERSION_3_6_0); ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +@@ -6430,7 +6430,7 @@ __glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata, + glusterd_friend_sm_state_name_get(peerinfo->state.state)); + + if (peerinfo->connected) { +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + glusterd_get_lock_owner(&uuid); + if (!gf_uuid_is_null(uuid) && + !gf_uuid_compare(peerinfo->uuid, uuid)) +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 6495a9d..dd3f9eb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -194,7 +194,7 @@ glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id) + if (!*txn_id) + goto out; + +- if (priv->op_version < GD_OP_VERSION_3_6_0) ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) + gf_uuid_copy(**txn_id, priv->global_txn_id); + else + gf_uuid_generate(**txn_id); +@@ -1864,12 +1864,12 @@ glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr) + } + + if ((cmd & GF_CLI_STATUS_SNAPD) && +- (priv->op_version < GD_OP_VERSION_3_6_0)) { ++ (priv->op_version < GD_OP_VERSION_RHS_3_0)) { + snprintf(msg, sizeof(msg), + "The cluster is operating at " + "version less than %d. Getting the " + "status of snapd is not allowed in this state.", +- GD_OP_VERSION_3_6_0); ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +@@ -3877,7 +3877,7 @@ glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx) + continue; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn(NULL, this, peerinfo); +@@ -3980,7 +3980,7 @@ glusterd_op_ac_send_unlock(glusterd_op_sm_event_t *event, void *ctx) + continue; + /* Based on the op_version, + * release the cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn(NULL, this, peerinfo); +@@ -4957,7 +4957,7 @@ glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx) + count = brick_index_max + other_count + 1; + + /* +- * a glusterd lesser than version 3.7 will be sending the ++ * a glusterd lesser than version RHS-3.0.4 will be sending the + * rdma port in older key. Changing that value from here + * to support backward compatibility + */ +@@ -4977,7 +4977,7 @@ glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx) + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) + goto out; +- if (conf->op_version < GD_OP_VERSION_3_7_0 && ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0_4 && + volinfo->transport_type == GF_TRANSPORT_RDMA) { + ret = glusterd_op_modify_port_key(op_ctx, brick_index_max); + if (ret) +@@ -5576,7 +5576,7 @@ glusterd_op_txn_complete(uuid_t *txn_id) + glusterd_op_clear_errstr(); + + /* Based on the op-version, we release the cluster or mgmt_v3 lock */ +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_RHS_3_0) { + ret = glusterd_unlock(MY_UUID); + /* unlock can't/shouldn't fail here!! */ + if (ret) +diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +index 5b5959e..f24c86e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +@@ -547,7 +547,7 @@ out: + * @prefix. All the parameters are compulsory. + * + * The complete address list is added to the dict only if the cluster op-version +- * is >= GD_OP_VERSION_3_6_0 ++ * is >= GD_OP_VERSION_3_7_0 + */ + int + gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict, +@@ -593,7 +593,7 @@ gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict, + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_7_0) { + ret = 0; + goto out; + } +@@ -778,7 +778,7 @@ gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, + GF_FREE(peerinfo->hostname); + peerinfo->hostname = gf_strdup(hostname); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_7_0) { + ret = 0; + goto out; + } +@@ -894,7 +894,7 @@ gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict, + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_7_0) { + ret = 0; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 34b0294..6365b6e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -792,13 +792,13 @@ glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr) + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + /* Check if the connected clients are all of version +- * glusterfs-3.6 and higher. This is needed to prevent some data ++ * RHS-2.1u5 and higher. This is needed to prevent some data + * loss issues that could occur when older clients are connected + * when rebalance is run. This check can be bypassed by using + * 'force' + */ + ret = glusterd_check_client_op_version_support( +- volname, GD_OP_VERSION_3_6_0, NULL); ++ volname, GD_OP_VERSION_RHS_2_1_5, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " +diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +index ca1de1a..0615081 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c ++++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +@@ -297,7 +297,7 @@ glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr, + if (ret) + goto out; + +- } else if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ } else if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +@@ -396,7 +396,7 @@ glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick, + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = dict_get_strn(dict, "brick1.mount_dir", SLEN("brick1.mount_dir"), + &brick_mount_dir); + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +index 728781d..4ec9700 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +@@ -288,11 +288,11 @@ __glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count, + * we need to add the new hostname to the peer. + * + * This addition should only be done for cluster op-version >= +- * GD_OP_VERSION_3_6_0 as address lists are only supported from then on. ++ * GD_OP_VERSION_3_7_0 as address lists are only supported from then on. + * Also, this update should only be done when an explicit CLI probe + * command was used to begin the probe process. + */ +- if ((conf->op_version >= GD_OP_VERSION_3_6_0) && ++ if ((conf->op_version >= GD_OP_VERSION_3_7_0) && + (gf_uuid_compare(rsp.uuid, peerinfo->uuid) == 0)) { + ctx = ((call_frame_t *)myframe)->local; + /* Presence of ctx->req implies this probe was started by a cli +@@ -1544,7 +1544,7 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) + goto out; + } + +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = glusterd_add_missed_snaps_to_export_dict(peer_data); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 73a11a3..54a7bd1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -955,7 +955,7 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx) + + /* Compare missed_snapshot list with the peer * + * if volume comparison is successful */ +- if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_3_6_0)) { ++ if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_RHS_3_0)) { + ret = glusterd_import_friend_missed_snap_list(ev_ctx->vols); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index 1ece374..2958443 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -470,7 +470,7 @@ gd_add_brick_snap_details_to_dict(dict_t *dict, char *prefix, + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -547,7 +547,7 @@ gd_add_vol_snap_details_to_dict(dict_t *dict, char *prefix, + GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -846,7 +846,7 @@ gd_import_new_brick_snap_details(dict_t *dict, char *prefix, + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -903,8 +903,8 @@ out: + * Imports the snapshot details of a volume if required and available + * + * Snapshot details will be imported only if cluster.op_version is greater than +- * or equal to GD_OP_VERSION_3_6_0, the op-version from which volume snapshot is +- * supported. ++ * or equal to GD_OP_VERSION_RHS_3_0, the op-version from which volume snapshot ++ * is supported. + */ + int + gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo, +@@ -928,7 +928,7 @@ gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo, + GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out); + GF_VALIDATE_OR_GOTO(this->name, (volname != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index 8f5cd6d..c56be91 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -9345,14 +9345,14 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req) + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + snprintf(err_str, sizeof(err_str), + "Cluster operating version" + " is lesser than the supported version " + "for a snapshot"); + op_errno = EG_OPNOTSUP; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, +- "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_3_6_0); ++ "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 7acea05..64447e7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -313,7 +313,7 @@ gd_store_brick_snap_details_write(int fd, glusterd_brickinfo_t *brickinfo) + GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out); + GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -813,7 +813,7 @@ glusterd_volume_write_snap_details(int fd, glusterd_volinfo_t *volinfo) + GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out); + GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -967,7 +967,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + snprintf(buf, sizeof(buf), "%d", volinfo->disperse_count); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, buf); + if (ret) +@@ -2502,7 +2502,7 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo) + conf = THIS->private; + GF_ASSERT(volinfo); + +- if (conf->op_version < GD_OP_VERSION_3_6_0) { ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) { + ret = 0; + goto out; + } +@@ -2510,15 +2510,16 @@ glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo) + /* + * This is needed for upgrade situations. Say a volume is created with + * older version of glusterfs and upgraded to a glusterfs version equal +- * to or greater than GD_OP_VERSION_3_6_0. The older glusterd would not +- * have created the snapd.info file related to snapshot daemon for user +- * serviceable snapshots. So as part of upgrade when the new glusterd +- * starts, as part of restore (restoring the volume to be precise), it +- * tries to snapd related info from snapd.info file. But since there was +- * no such file till now, the restore operation fails. Thus, to prevent +- * it from happening check whether user serviceable snapshots features +- * is enabled before restoring snapd. If its disabled, then simply +- * exit by returning success (without even checking for the snapd.info). ++ * to or greater than GD_OP_VERSION_RHS_3_0. The older glusterd would ++ * not have created the snapd.info file related to snapshot daemon for ++ * user serviceable snapshots. So as part of upgrade when the new ++ * glusterd starts, as part of restore (restoring the volume to be ++ * precise), it tries to snapd related info from snapd.info file. But ++ * since there was no such file till now, the restore operation fails. ++ * Thus, to prevent it from happening check whether user serviceable ++ * snapshots features is enabled before restoring snapd. If its ++ * disbaled, then simply exit by returning success (without even ++ * checking for the snapd.info). + */ + + if (!dict_get_str_boolean(volinfo->dict, "features.uss", _gf_false)) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 45b221c..1741cf8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1827,7 +1827,7 @@ gd_sync_task_begin(dict_t *op_ctx, rpcsvc_request_t *req) + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_3_6_0) ++ if (conf->op_version < GD_OP_VERSION_RHS_3_0) + cluster_lock = _gf_true; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c +index dd86cf5..4dc0d44 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tier.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c +@@ -867,7 +867,8 @@ glusterd_op_stage_tier(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + * when rebalance is run. This check can be bypassed by using + * 'force' + */ +- ret = glusterd_check_client_op_version_support(volname, GD_OP_VERSION_3_6_0, ++ ret = glusterd_check_client_op_version_support(volname, ++ GD_OP_VERSION_RHS_3_0, + NULL); + if (ret) { + ret = gf_asprintf(op_errstr, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 8bbd795..52b83ec 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -12226,10 +12226,10 @@ gd_update_volume_op_versions(glusterd_volinfo_t *volinfo) + } + + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { +- if (volinfo->op_version < GD_OP_VERSION_3_6_0) +- volinfo->op_version = GD_OP_VERSION_3_6_0; +- if (volinfo->client_op_version < GD_OP_VERSION_3_6_0) +- volinfo->client_op_version = GD_OP_VERSION_3_6_0; ++ if (volinfo->op_version < GD_OP_VERSION_3_7_0) ++ volinfo->op_version = GD_OP_VERSION_3_7_0; ++ if (volinfo->client_op_version < GD_OP_VERSION_3_7_0) ++ volinfo->client_op_version = GD_OP_VERSION_3_7_0; + } + + return; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 7cfba3d..86ef470 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1389,7 +1389,7 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + ret = glusterd_get_brick_mount_dir(brick_info->path, + brick_info->hostname, + brick_info->mount_dir); +@@ -1698,7 +1698,7 @@ glusterd_op_stage_start_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + if (strlen(brickinfo->mount_dir) < 1) { + ret = glusterd_get_brick_mount_dir( + brickinfo->path, brickinfo->hostname, brickinfo->mount_dir); +@@ -2395,10 +2395,10 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr) + volname); + goto out; + } +- if (priv->op_version < GD_OP_VERSION_3_6_0) { ++ if (priv->op_version < GD_OP_VERSION_3_7_0) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, + "Disperse volume " +- "needs op-version 3.6.0 or higher"); ++ "needs op-version 30700 or higher"); + ret = -1; + goto out; + } +@@ -2494,7 +2494,7 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (priv->op_version >= GD_OP_VERSION_3_6_0) { ++ if (priv->op_version >= GD_OP_VERSION_RHS_3_0) { + brick_mount_dir = NULL; + ret = snprintf(key, sizeof(key), "brick%d.mount_dir", i); + ret = dict_get_strn(dict, key, ret, &brick_mount_dir); +@@ -2703,7 +2703,7 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr) + /* A bricks mount dir is required only by snapshots which were + * introduced in gluster-3.6.0 + */ +- if (conf->op_version >= GD_OP_VERSION_3_6_0) { ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { + brick_count++; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index dc58e11..d07fc10 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -807,7 +807,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "cluster.subvols-per-directory", + .voltype = "cluster/distribute", + .option = "directory-layout-spread", +- .op_version = 2, ++ .op_version = 1, + .validate_fn = validate_subvols_per_directory, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.readdir-optimize", +@@ -817,25 +817,25 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "cluster.rsync-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.extra-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.dht-xattr-name", + .voltype = "cluster/distribute", + .option = "xattr-name", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "cluster.randomize-hash-range-by-gfid", + .voltype = "cluster/distribute", + .option = "randomize-hash-range-by-gfid", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT, + }, + { +@@ -877,12 +877,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "cluster/nufa", + .option = "local-volume-name", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "cluster.weighted-rebalance", + .voltype = "cluster/distribute", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + + /* Switch xlator options (Distribute special case) */ +@@ -890,13 +890,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "cluster/distribute", + .option = "!switch", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.switch-pattern", + .voltype = "cluster/switch", + .option = "pattern.switch.case", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* AFR xlator options */ +@@ -1014,16 +1014,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.readdir-failover", + .voltype = "cluster/replicate", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.ensure-durability", + .voltype = "cluster/replicate", +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.consistent-metadata", + .voltype = "cluster/replicate", + .type = DOC, +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0_4, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "cluster.heal-wait-queue-length", + .voltype = "cluster/replicate", +@@ -1080,45 +1080,45 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = "diagnostics.brick-logger", + .voltype = "debug/io-stats", + .option = "!logger", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-logger", + .voltype = "debug/io-stats", + .option = "!logger", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-buf-size", + .voltype = "debug/io-stats", + .option = "!log-buf-size", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-log-buf-size", + .voltype = "debug/io-stats", + .option = "!log-buf-size", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "diagnostics.brick-log-flush-timeout", + .voltype = "debug/io-stats", + .option = "!log-flush-timeout", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "diagnostics.client-log-flush-timeout", + .voltype = "debug/io-stats", + .option = "!log-flush-timeout", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "diagnostics.stats-dump-interval", + .voltype = "debug/io-stats", +@@ -1203,6 +1203,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "performance/io-threads", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, ++ {.key = "performance.least-rate-limit", ++ .voltype = "performance/io-threads", ++ .op_version = 1 ++ }, + + /* Other perf xlators' options */ + {.key = "performance.io-cache-pass-through", +@@ -1237,12 +1241,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.nfs.flush-behind", + .voltype = "performance/write-behind", + .option = "flush-behind", +- .op_version = 1, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", +- .op_version = 1, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.resync-failed-syncs-after-fsync", +@@ -1262,27 +1266,27 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.nfs.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", +- .op_version = 1, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", +- .op_version = 2, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.nfs.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", +- .op_version = 2, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.write-behind-trickling-writes", + .voltype = "performance/write-behind", +@@ -1302,12 +1306,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.lazy-open", + .voltype = "performance/open-behind", + .option = "lazy-open", +- .op_version = 3, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.read-after-open", + .voltype = "performance/open-behind", + .option = "read-after-open", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "performance.open-behind-pass-through", +@@ -1389,22 +1393,22 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "encryption/crypt", + .option = "!feat", + .value = "off", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .description = "enable/disable client-side encryption for " + "the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + + {.key = "encryption.master-key", + .voltype = "encryption/crypt", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "encryption.data-key-size", + .voltype = "encryption/crypt", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "encryption.block-size", + .voltype = "encryption/crypt", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + + /* Client xlator options */ +@@ -1431,7 +1435,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "network.remote-dio", + .voltype = "protocol/client", + .option = "filter-O_DIRECT", +- .op_version = 2, ++ .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "client.own-thread", +@@ -1443,7 +1447,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + { + .key = "client.event-threads", + .voltype = "protocol/client", +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0_4, + }, + {.key = "client.tcp-user-timeout", + .voltype = "protocol/client", +@@ -1501,7 +1505,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "server.root-squash", + .voltype = "protocol/server", + .option = "root-squash", +- .op_version = 2}, ++ .op_version = 1}, + {.key = "server.all-squash", + .voltype = "protocol/server", + .option = "all-squash", +@@ -1509,11 +1513,11 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "server.anonuid", + .voltype = "protocol/server", + .option = "anonuid", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "server.anongid", + .voltype = "protocol/server", + .option = "anongid", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "server.statedump-path", + .voltype = "protocol/server", + .option = "statedump-path", +@@ -1522,7 +1526,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "protocol/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "server.ssl", + .voltype = "protocol/server", + .value = "off", +@@ -1540,12 +1544,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "the clients that are allowed to access the server." + "By default, all TLS authenticated clients are " + "allowed to access the server.", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = "server.manage-gids", + .voltype = "protocol/server", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "server.dynamic-auth", +@@ -1556,12 +1560,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = "client.send-gids", + .voltype = "protocol/client", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "server.gid-timeout", + .voltype = "protocol/server", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "server.own-thread", +@@ -1573,7 +1577,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + { + .key = "server.event-threads", + .voltype = "protocol/server", +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0_4, + }, + { + .key = "server.tcp-user-timeout", +@@ -1643,13 +1647,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = SSL_CERT_DEPTH_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-cert-depth", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = SSL_CIPHER_LIST_OPT, + .voltype = "rpc-transport/socket", + .option = "!ssl-cipher-list", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_3_7_0, + }, + { + .key = SSL_DH_PARAM_OPT, +@@ -1690,8 +1694,8 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.readdir-ahead", + .voltype = "performance/readdir-ahead", + .option = "!perf", +- .value = "on", +- .op_version = 3, ++ .value = "off", ++ .op_version = GD_OP_VERSION_RHS_3_0, + .description = "enable/disable readdir-ahead translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "performance.io-cache", +@@ -1804,7 +1808,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + /* Feature translators */ + {.key = "features.uss", + .voltype = "features/snapview-server", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .value = "off", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .validate_fn = validate_uss, +@@ -1813,7 +1817,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + + {.key = "features.snapshot-directory", + .voltype = "features/snapview-client", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .value = ".snaps", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .validate_fn = validate_uss_dir, +@@ -1823,7 +1827,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + + {.key = "features.show-snapshot-directory", + .voltype = "features/snapview-client", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .value = "off", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT, + .description = "show entry point in readdir output of " +@@ -1847,30 +1851,30 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/cdc", + .option = "!feat", + .value = "off", +- .op_version = 3, ++ .op_version = GD_OP_VERSION_RHS_3_0, + .description = "enable/disable network compression translator", + .flags = VOLOPT_FLAG_XLATOR_OPT}, + {.key = "network.compression.window-size", + .voltype = "features/cdc", + .option = "window-size", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.mem-level", + .voltype = "features/cdc", + .option = "mem-level", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.min-size", + .voltype = "features/cdc", + .option = "min-size", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.compression-level", + .voltype = "features/cdc", + .option = "compression-level", +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "network.compression.debug", + .voltype = "features/cdc", + .option = "debug", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = GD_OP_VERSION_RHS_3_0}, + #endif + + /* Quota xlator options */ +@@ -1886,28 +1890,28 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/quota", + .option = "default-soft-limit", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.soft-timeout", + .voltype = "features/quota", + .option = "soft-timeout", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.hard-timeout", + .voltype = "features/quota", + .option = "hard-timeout", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.alert-time", + .voltype = "features/quota", + .option = "alert-time", + .type = NO_DOC, +- .op_version = 3, ++ .op_version = 2, + }, + { + .key = "features.quota-deem-statfs", +@@ -2009,22 +2013,22 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "debug/error-gen", + .option = "failure", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "debug.error-number", + .voltype = "debug/error-gen", + .option = "error-no", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "debug.random-failure", + .voltype = "debug/error-gen", + .option = "random-failure", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "debug.error-fops", + .voltype = "debug/error-gen", + .option = "enable", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + + /* NFS xlator options */ + {.key = "nfs.enable-ino32", +@@ -2066,7 +2070,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.port", + .voltype = "nfs/server", + .option = "nfs.port", +@@ -2128,7 +2132,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "nfs.acl", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.mount-udp", + .voltype = "nfs/server", + .option = "nfs.mount-udp", +@@ -2144,14 +2148,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "nfs.rpc-statd", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "nfs.log-level", + .voltype = "nfs/server", + .option = "nfs.log-level", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "nfs.server-aux-gids", + .voltype = "nfs/server", +@@ -2162,27 +2166,27 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "nfs/server", + .option = "nfs.drc", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 1}, + {.key = "nfs.drc-size", + .voltype = "nfs/server", + .option = "nfs.drc-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 1}, + {.key = "nfs.read-size", + .voltype = "nfs/server", + .option = "nfs3.read-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.write-size", + .voltype = "nfs/server", + .option = "nfs3.write-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.readdir-size", + .voltype = "nfs/server", + .option = "nfs3.readdir-size", + .type = GLOBAL_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "nfs.rdirplus", + .voltype = "nfs/server", + .option = "nfs.rdirplus", +@@ -2219,7 +2223,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "features.read-only", + .voltype = "features/read-only", + .option = "read-only", +- .op_version = 1, ++ .op_version = 2, + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, + {.key = "features.worm", + .voltype = "features/worm", +@@ -2266,14 +2270,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "storage.linux-aio", .voltype = "storage/posix", .op_version = 1}, + {.key = "storage.batch-fsync-mode", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + {.key = "storage.batch-fsync-delay-usec", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + { + .key = "storage.xattr-user-namespace-mode", + .voltype = "storage/posix", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "storage.owner-uid", + .voltype = "storage/posix", +@@ -2285,15 +2289,15 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = 1}, + {.key = "storage.node-uuid-pathinfo", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + {.key = "storage.health-check-interval", + .voltype = "storage/posix", +- .op_version = 3}, ++ .op_version = 2}, + { + .option = "update-link-count-parent", + .key = "storage.build-pgfid", + .voltype = "storage/posix", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .option = "gfid2path", +@@ -2363,7 +2367,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_4_1_0, + }, +- {.key = "storage.bd-aio", .voltype = "storage/bd", .op_version = 3}, ++ {.key = "storage.bd-aio", ++ .voltype = "storage/bd", ++ .op_version = GD_OP_VERSION_RHS_3_0}, + {.key = "config.memory-accounting", + .voltype = "mgmt/glusterd", + .option = "!config", +@@ -2385,37 +2391,37 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = GLUSTERD_QUORUM_TYPE_KEY, + .voltype = "mgmt/glusterd", + .value = "off", +- .op_version = 2}, ++ .op_version = 1}, + {.key = GLUSTERD_QUORUM_RATIO_KEY, + .voltype = "mgmt/glusterd", + .value = "0", +- .op_version = 2}, ++ .op_version = 1}, + /* changelog translator - global tunables */ + {.key = "changelog.changelog", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.changelog-dir", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.encoding", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.rollover-time", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + {.key = "changelog.fsync-interval", + .voltype = "features/changelog", + .type = NO_DOC, +- .op_version = 3}, ++ .op_version = 2}, + { + .key = "changelog.changelog-barrier-timeout", + .voltype = "features/changelog", + .value = BARRIER_TIMEOUT, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + {.key = "changelog.capture-del-path", + .voltype = "features/changelog", +@@ -2426,18 +2432,18 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/barrier", + .value = "disable", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_3_7_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = "features.barrier-timeout", + .voltype = "features/barrier", + .value = BARRIER_TIMEOUT, +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = GLUSTERD_GLOBAL_OP_VERSION_KEY, + .voltype = "mgmt/glusterd", +- .op_version = GD_OP_VERSION_3_6_0, ++ .op_version = GD_OP_VERSION_RHS_3_0, + }, + { + .key = GLUSTERD_MAX_OP_VERSION_KEY, +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index c8e84f6..dea6c28 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -3002,7 +3002,7 @@ struct volume_options options[] = { + {.key = {"send-gids"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE}, + {.key = {"event-threads"}, + .type = GF_OPTION_TYPE_INT, +@@ -3013,7 +3013,7 @@ struct volume_options options[] = { + "in parallel. Larger values would help process" + " responses faster, depending on available processing" + " power. Range 1-32 threads.", +- .op_version = {GD_OP_VERSION_3_7_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {NULL}}, + }; +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index b4b447b..6ae63ba 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -1854,13 +1854,13 @@ struct volume_options server_options[] = { + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Resolve groups on the server-side.", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"gid-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "300", + .description = "Timeout in seconds for the cached groups to expire.", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"event-threads"}, + .type = GF_OPTION_TYPE_INT, +@@ -1871,7 +1871,7 @@ struct volume_options server_options[] = { + "in parallel. Larger values would help process" + " responses faster, depending on available processing" + " power.", +- .op_version = {GD_OP_VERSION_3_7_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"dynamic-auth"}, + .type = GF_OPTION_TYPE_BOOL, +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index f0d8e3f..ed82e35 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -1243,7 +1243,7 @@ struct volume_options posix_options[] = { + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable placeholders for gfid to path conversion", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"gfid2path"}, + .type = GF_OPTION_TYPE_BOOL, +@@ -1279,7 +1279,7 @@ struct volume_options posix_options[] = { + " The raw filesystem will not be compatible with OS X Finder.\n" + "\t- Strip: Will strip the user namespace before setting. The raw " + "filesystem will work in OS X.\n", +- .op_version = {GD_OP_VERSION_3_6_0}, ++ .op_version = {GD_OP_VERSION_RHS_3_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + #endif + { +-- +1.8.3.1 + diff --git a/SOURCES/0003-rpc-set-bind-insecure-to-off-by-default.patch b/SOURCES/0003-rpc-set-bind-insecure-to-off-by-default.patch new file mode 100644 index 0000000..639b62f --- /dev/null +++ b/SOURCES/0003-rpc-set-bind-insecure-to-off-by-default.patch @@ -0,0 +1,51 @@ +From 9b58731c83bc1ee9c5f2a3cd58a8f845cf09ee82 Mon Sep 17 00:00:00 2001 +From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> +Date: Mon, 21 Mar 2016 13:54:19 +0530 +Subject: [PATCH 03/52] rpc: set bind-insecure to off by default + +commit 243a5b429f225acb8e7132264fe0a0835ff013d5 turn's 'ON' +allow-insecure and bind-insecure by default. + +Problem: +Now with newer versions we have bind-insecure 'ON' by default. +So, while upgrading subset of nodes from a trusted storage pool, +nodes which have older versions of glusterfs will expect +connection from secure ports only (since they still have +bind-insecure off) thus they reject connection from upgraded +nodes which now have insecure ports. + +Hence we will run into connection issues between peers. + +Solution: +This patch will turn bind-insecure 'OFF' by default to avoid +problem explained above. + +Label: DOWNSTREAM ONLY + +Change-Id: Id7a19b4872399d3b019243b0857c9c7af75472f7 +Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/70313 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: Atin Mukherjee <amukherj@redhat.com> +--- + rpc/rpc-lib/src/rpc-transport.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index f9cbdf1..4beaaf9 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -269,8 +269,8 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + else + trans->bind_insecure = 0; + } else { +- /* By default allow bind insecure */ +- trans->bind_insecure = 1; ++ /* Turning off bind insecure by default*/ ++ trans->bind_insecure = 0; + } + + ret = dict_get_str(options, "transport-type", &type); +-- +1.8.3.1 + diff --git a/SOURCES/0004-glusterd-spec-fixing-autogen-issue.patch b/SOURCES/0004-glusterd-spec-fixing-autogen-issue.patch new file mode 100644 index 0000000..f3cb2ec --- /dev/null +++ b/SOURCES/0004-glusterd-spec-fixing-autogen-issue.patch @@ -0,0 +1,47 @@ +From aa73240892a7072be68772370fd95173e6e77d10 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 21 Mar 2016 17:07:00 +0530 +Subject: [PATCH 04/52] glusterd/spec: fixing autogen issue + +Backport of https://code.engineering.redhat.com/gerrit/#/c/59463/ + +Because of the incorrect build section, autogen.sh wasn't re-run during the rpm +build process. The `extras/Makefile.in` was not regenerated with the changes +made to `extras/Makefile.am` in the firewalld patch. This meant that +`extras/Makefile` was generated without the firewalld changes. So the firewalld +config wasn't installed during `make install` and rpmbuild later failed when it +failed to find `/usr/lib/firewalld/glusterfs.xml` + +Label: DOWNSTREAM ONLY + +>Reviewed-on: https://code.engineering.redhat.com/gerrit/59463 + +Change-Id: I498bcceeacbd839640282eb6467c9f1464505697 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/70343 +Reviewed-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index c655f16..f5c1f79 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -700,12 +700,7 @@ done + + %build + +-# RHEL6 and earlier need to manually replace config.guess and config.sub +-%if ( 0%{?rhel} && 0%{?rhel} <= 6 ) +-./autogen.sh +-%endif +- +-%configure \ ++./autogen.sh && %configure \ + %{?_with_asan} \ + %{?_with_cmocka} \ + %{?_with_debug} \ +-- +1.8.3.1 + diff --git a/SOURCES/0005-libglusterfs-glusterd-Fix-compilation-errors.patch b/SOURCES/0005-libglusterfs-glusterd-Fix-compilation-errors.patch new file mode 100644 index 0000000..5aa4f20 --- /dev/null +++ b/SOURCES/0005-libglusterfs-glusterd-Fix-compilation-errors.patch @@ -0,0 +1,36 @@ +From 44f758a56c5c5ad340ebc6d6a6478e8712c2c101 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 21 Mar 2016 22:31:02 +0530 +Subject: [PATCH 05/52] libglusterfs/glusterd: Fix compilation errors + +1. Removed duplicate definition of GD_OP_VER_PERSISTENT_AFR_XATTRS introduced in +d367a88 where GD_OP_VER_PERSISTENT_AFR_XATTRS was redfined + +2. Fixed incorrect op-version + +Label: DOWNSTREAM ONLY + +Change-Id: Icfa3206e8a41a11875641f57523732b80837f8f6 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/70384 +Reviewed-by: Nithya Balachandran <nbalacha@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 64447e7..51ca3d1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -967,7 +967,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { ++ if (conf->op_version >= GD_OP_VERSION_3_7_0) { + snprintf(buf, sizeof(buf), "%d", volinfo->disperse_count); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, buf); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0006-build-remove-ghost-directory-entries.patch b/SOURCES/0006-build-remove-ghost-directory-entries.patch new file mode 100644 index 0000000..68dd8f3 --- /dev/null +++ b/SOURCES/0006-build-remove-ghost-directory-entries.patch @@ -0,0 +1,58 @@ +From 1f28e008825ae291208a9e6c714dd642f715a2a1 Mon Sep 17 00:00:00 2001 +From: "Bala.FA" <barumuga@redhat.com> +Date: Mon, 7 Apr 2014 15:24:10 +0530 +Subject: [PATCH 06/52] build: remove ghost directory entries + +ovirt requires hook directories for gluster management and ghost +directories are no more ghost entries + +Label: DOWNSTREAM ONLY + +Change-Id: Iaf1066ba0655619024f87eaaa039f0010578c567 +Signed-off-by: Bala.FA <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/60133 +Tested-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index f5c1f79..6be492e 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -799,15 +799,30 @@ install -D -p -m 0644 extras/glusterfs-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs + + %if ( 0%{!?_without_georeplication:1} ) +-# geo-rep ghosts + mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication + touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf + install -D -p -m 0644 extras/glusterfs-georep-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep + %endif + ++%if ( 0%{!?_without_syslog:1} ) ++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) ++install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \ ++ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example ++%endif ++ ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \ ++ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example ++%endif ++ ++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) ++install -D -p -m 0644 extras/logger.conf.example \ ++ %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example ++%endif ++%endif ++ + %if ( 0%{!?_without_server:1} ) +-# the rest of the ghosts + touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info + touch %{buildroot}%{_sharedstatedir}/glusterd/options + subdirs=(add-brick create copy-file delete gsync-create remove-brick reset set start stop) +-- +1.8.3.1 + diff --git a/SOURCES/0007-build-add-RHGS-specific-changes.patch b/SOURCES/0007-build-add-RHGS-specific-changes.patch new file mode 100644 index 0000000..ac092bd --- /dev/null +++ b/SOURCES/0007-build-add-RHGS-specific-changes.patch @@ -0,0 +1,620 @@ +From 7744475550cd27f58f536741e9c50c639d3b02d8 Mon Sep 17 00:00:00 2001 +From: "Bala.FA" <barumuga@redhat.com> +Date: Thu, 6 Dec 2018 20:06:27 +0530 +Subject: [PATCH 07/52] build: add RHGS specific changes + +Label: DOWNSTREAM ONLY + +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1074947 +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1097782 +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1115267 +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1221743 +Change-Id: I08333334745adf2350e772c6454ffcfe9c08cb89 +Reviewed-on: https://code.engineering.redhat.com/gerrit/24983 +Reviewed-on: https://code.engineering.redhat.com/gerrit/25451 +Reviewed-on: https://code.engineering.redhat.com/gerrit/25518 +Reviewed-on: https://code.engineering.redhat.com/gerrit/25983 +Signed-off-by: Bala.FA <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/60134 +Tested-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 485 +++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 481 insertions(+), 4 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 6be492e..eb04491 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -95,9 +95,16 @@ + %{?_without_server:%global _without_server --without-server} + + # disable server components forcefully as rhel <= 6 +-%if ( 0%{?rhel} && 0%{?rhel} <= 6 ) ++%if ( 0%{?rhel} ) ++%if ( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) ++%global _without_server %{nil} ++%else + %global _without_server --without-server + %endif ++%endif ++ ++%global _without_extra_xlators 1 ++%global _without_regression_tests 1 + + # syslog + # if you wish to build rpms without syslog logging, compile like this +@@ -229,7 +236,8 @@ Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} + %else + Name: @PACKAGE_NAME@ + Version: @PACKAGE_VERSION@ +-Release: 0.@PACKAGE_RELEASE@%{?dist} ++Release: @PACKAGE_RELEASE@%{?dist} ++ExcludeArch: i686 + %endif + License: GPLv2 or LGPLv3+ + URL: http://docs.gluster.org/ +@@ -243,8 +251,6 @@ Source8: glusterfsd.init + Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz + %endif + +-BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) +- + Requires(pre): shadow-utils + %if ( 0%{?_with_systemd:1} ) + BuildRequires: systemd +@@ -384,7 +390,9 @@ This package provides cloudsync plugins for archival feature. + Summary: Development Libraries + Requires: %{name}%{?_isa} = %{version}-%{release} + # Needed for the Glupy examples to work ++%if ( 0%{!?_without_extra_xlators:1} ) + Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release} ++%endif + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -397,6 +405,7 @@ is in user space and easily manageable. + + This package provides the development libraries and include files. + ++%if ( 0%{!?_without_extra_xlators:1} ) + %package extra-xlators + Summary: Extra Gluster filesystem Translators + # We need python-gluster rpm for gluster module's __init__.py in Python +@@ -415,6 +424,7 @@ is in user space and easily manageable. + + This package provides extra filesystem Translators, such as Glupy, + for GlusterFS. ++%endif + + %package fuse + Summary: Fuse client +@@ -440,6 +450,30 @@ is in user space and easily manageable. + This package provides support to FUSE based clients and inlcudes the + glusterfs(d) binary. + ++%if ( 0%{!?_without_server:1} ) ++%package ganesha ++Summary: NFS-Ganesha configuration ++Group: Applications/File ++ ++Requires: %{name}-server%{?_isa} = %{version}-%{release} ++Requires: nfs-ganesha-gluster, pcs, dbus ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++Requires: cman, pacemaker, corosync ++%endif ++ ++%description ganesha ++GlusterFS is a distributed file-system capable of scaling to several ++petabytes. It aggregates various storage bricks over Infiniband RDMA ++or TCP/IP interconnect into one large parallel network file ++system. GlusterFS is one of the most sophisticated file systems in ++terms of features and extensibility. It borrows a powerful concept ++called Translators from GNU Hurd kernel. Much of the code in GlusterFS ++is in user space and easily manageable. ++ ++This package provides the configuration and related files for using ++NFS-Ganesha as the NFS server using GlusterFS ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + %package geo-replication + Summary: GlusterFS Geo-replication +@@ -541,6 +575,7 @@ is in user space and easily manageable. + This package provides support to ib-verbs library. + %endif + ++%if ( 0%{!?_without_regression_tests:1} ) + %package regression-tests + Summary: Development Tools + Requires: %{name}%{?_isa} = %{version}-%{release} +@@ -556,6 +591,7 @@ Requires: nfs-utils xfsprogs yajl psmisc bc + %description regression-tests + The Gluster Test Framework, is a suite of scripts used for + regression testing of Gluster. ++%endif + + %if ( 0%{!?_without_ocf:1} ) + %package resource-agents +@@ -1092,6 +1128,16 @@ exit 0 + %if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1} + %{_tmpfilesdir}/gluster.conf + %endif ++%if ( 0%{?_without_extra_xlators:1} ) ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache.so ++%endif ++%if ( 0%{?_without_regression_tests:1} ) ++%exclude %{_datadir}/glusterfs/run-tests.sh ++%exclude %{_datadir}/glusterfs/tests ++%endif + + %files api + %exclude %{_libdir}/*.so +@@ -1134,12 +1180,14 @@ exit 0 + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so + ++%if ( 0%{!?_without_extra_xlators:1} ) + %files extra-xlators + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so ++%endif + + %files fuse + # glusterfs is a symlink to glusterfsd, -server depends on -fuse. +@@ -1239,11 +1287,13 @@ exit 0 + %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma* + %endif + ++%if ( 0%{!?_without_regression_tests:1} ) + %files regression-tests + %dir %{_datadir}/glusterfs + %{_datadir}/glusterfs/run-tests.sh + %{_datadir}/glusterfs/tests + %exclude %{_datadir}/glusterfs/tests/vagrant ++%endif + + %if ( 0%{!?_without_ocf:1} ) + %files resource-agents +@@ -1424,6 +1474,433 @@ exit 0 + %endif + %endif + ++##----------------------------------------------------------------------------- ++## All %pretrans should be placed here and keep them sorted ++## ++%if 0%{!?_without_server:1} ++%pretrans -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ echo "ERROR: Distribute volumes detected. In-service rolling upgrade requires distribute volume(s) to be stopped." ++ echo "ERROR: Please stop distribute volume(s) before proceeding... exiting!" ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ echo "WARNING: Updating glusterfs requires its processes to be killed. This action does NOT incur downtime." ++ echo "WARNING: Ensure to wait for the upgraded server to finish healing before proceeding." ++ echo "WARNING: Refer upgrade section of install guide for more details" ++ echo "Please run # service glusterd stop; pkill glusterfs; pkill glusterfsd; pkill gsyncd.py;" ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans api -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-api_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans api-devel -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-api-devel_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans devel -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-devel_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%pretrans fuse -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-fuse_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%if 0%{?_can_georeplicate} ++%if ( 0%{!?_without_georeplication:1} ) ++%pretrans geo-replication -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-geo-replication_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++%endif ++ ++ ++ ++%pretrans libs -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-libs_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ ++%if ( 0%{!?_without_rdma:1} ) ++%pretrans rdma -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-rdma_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++ ++ ++ ++%if ( 0%{!?_without_ocf:1} ) ++%pretrans resource-agents -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-resource-agents_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++ ++ ++ ++%pretrans server -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-server_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++%endif ++ + %changelog + * Wed Mar 6 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com> + - remove unneeded ldconfig in scriptlets +-- +1.8.3.1 + diff --git a/SOURCES/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch b/SOURCES/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch new file mode 100644 index 0000000..66a39d2 --- /dev/null +++ b/SOURCES/0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch @@ -0,0 +1,35 @@ +From 0ab54c5b274f29fcdd4787325c7183a84e875bbc Mon Sep 17 00:00:00 2001 +From: "Bala.FA" <barumuga@redhat.com> +Date: Thu, 22 May 2014 08:37:27 +0530 +Subject: [PATCH 08/52] secalert: remove setuid bit for fusermount-glusterfs + +glusterfs-fuse: File /usr/bin/fusermount-glusterfs on x86_64 is setuid +root but is not on the setxid whitelist + +Label: DOWNSTREAM ONLY + +Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=989480 +Change-Id: Icf6e5db72ae15ccc60b02be6713fb6c4f4c8a15f +Signed-off-by: Bala.FA <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/25453 +Signed-off-by: Bala.FA <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/60135 +Tested-by: Milind Changire <mchangir@redhat.com> +--- + contrib/fuse-util/Makefile.am | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am +index abbc10e..a071c81 100644 +--- a/contrib/fuse-util/Makefile.am ++++ b/contrib/fuse-util/Makefile.am +@@ -9,6 +9,5 @@ AM_CFLAGS = -Wall $(GF_CFLAGS) + + install-exec-hook: + -chown root $(DESTDIR)$(bindir)/fusermount-glusterfs +- chmod u+s $(DESTDIR)$(bindir)/fusermount-glusterfs + + CLEANFILES = +-- +1.8.3.1 + diff --git a/SOURCES/0009-build-introduce-security-hardening-flags-in-gluster.patch b/SOURCES/0009-build-introduce-security-hardening-flags-in-gluster.patch new file mode 100644 index 0000000..7cfe937 --- /dev/null +++ b/SOURCES/0009-build-introduce-security-hardening-flags-in-gluster.patch @@ -0,0 +1,57 @@ +From 2adb5d540e9344149ae2591811ad34928775e6fd Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Wed, 3 Jun 2015 11:09:21 +0530 +Subject: [PATCH 09/52] build: introduce security hardening flags in gluster + +This patch introduces two of the security hardening compiler flags RELRO & PIE +in gluster codebase. Using _hardened_build as 1 doesn't guarantee the existance +of these flags in the compilation as different versions of RHEL have different +redhat-rpm-config macro. So the idea is to export these flags at spec file +level. + +Label: DOWNSTREAM ONLY + +Change-Id: I0a1a56d0a8f54f110d306ba5e55e39b1b073dc84 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/49780 +Reviewed-by: Balamurugan Arumugam <barumuga@redhat.com> +Tested-by: Balamurugan Arumugam <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/60137 +Tested-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index eb04491..8a31a98 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -736,6 +736,25 @@ done + + %build + ++# In RHEL7 few hardening flags are available by default, however the RELRO ++# default behaviour is partial, convert to full ++%if ( 0%{?rhel} && 0%{?rhel} >= 7 ) ++LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now" ++export LDFLAGS ++%else ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++CFLAGS="$RPM_OPT_FLAGS -fPIE -DPIE" ++LDFLAGS="$RPM_LD_FLAGS -pie -Wl,-z,relro,-z,now" ++%else ++#It appears that with gcc-4.1.2 in RHEL5 there is an issue using both -fPIC and ++ # -fPIE that makes -z relro not work; -fPIE seems to undo what -fPIC does ++CFLAGS="$CFLAGS $RPM_OPT_FLAGS" ++LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now" ++%endif ++export CFLAGS ++export LDFLAGS ++%endif ++ + ./autogen.sh && %configure \ + %{?_with_asan} \ + %{?_with_cmocka} \ +-- +1.8.3.1 + diff --git a/SOURCES/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch b/SOURCES/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch new file mode 100644 index 0000000..9226936 --- /dev/null +++ b/SOURCES/0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch @@ -0,0 +1,100 @@ +From bf5906cbc9bf986c7495db792d098001e28c47e3 Mon Sep 17 00:00:00 2001 +From: Niels de Vos <ndevos@redhat.com> +Date: Wed, 22 Apr 2015 15:39:59 +0200 +Subject: [PATCH 10/52] spec: fix/add pre-transaction scripts for geo-rep and + cli packages + +The cli subpackage never had a %pretrans script, this has been added +now. + +The %pretrans script for ge-repliaction was never included in the RPM +package because it was disable by a undefined macro (_can_georeplicate). +This macro is not used/set anywhere else and _without_georeplication +should take care of it anyway. + +Note: This is a Red Hat Gluster Storage specific patch. Upstream + packaging guidelines do not allow these kind of 'features'. + +Label: DOWNSTREAM ONLY + +Change-Id: I16aab5bba72f1ed178f3bcac47f9d8ef767cfcef +Signed-off-by: Niels de Vos <ndevos@redhat.com> +Signed-off-by: Bala.FA <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/50491 +Reviewed-on: https://code.engineering.redhat.com/gerrit/60138 +Tested-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 43 +++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 41 insertions(+), 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 8a31a98..b70dbfc 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1626,6 +1626,47 @@ end + + + ++%pretrans cli -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-cli_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ ++ ++ + %pretrans devel -p <lua> + if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd +@@ -1708,7 +1749,6 @@ end + + + +-%if 0%{?_can_georeplicate} + %if ( 0%{!?_without_georeplication:1} ) + %pretrans geo-replication -p <lua> + if not posix.access("/bin/bash", "x") then +@@ -1749,7 +1789,6 @@ if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end + %endif +-%endif + + + +-- +1.8.3.1 + diff --git a/SOURCES/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch b/SOURCES/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch new file mode 100644 index 0000000..cc79317 --- /dev/null +++ b/SOURCES/0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch @@ -0,0 +1,138 @@ +From 40eb62a8872ce061416e899fb6c0784b6253ab16 Mon Sep 17 00:00:00 2001 +From: Niels de Vos <ndevos@redhat.com> +Date: Fri, 7 Dec 2018 14:05:21 +0530 +Subject: [PATCH 11/52] rpm: glusterfs-devel for client-builds should not + depend on -server + +glusterfs-devel for client-side packages should *not* include the +libgfdb.so symlink and libgfdb.pc file or any of the libchangelog +ones. + +Label: DOWNSTREAM ONLY + +Change-Id: Ifb4a9cf48841e5af5dd0a98b6de51e2ee469fc56 +Signed-off-by: Niels de Vos <ndevos@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/51019 +Reviewed-by: Balamurugan Arumugam <barumuga@redhat.com> +Tested-by: Balamurugan Arumugam <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/60139 +Tested-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 86 +++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 62 insertions(+), 24 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index b70dbfc..1c631db 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -450,30 +450,6 @@ is in user space and easily manageable. + This package provides support to FUSE based clients and inlcudes the + glusterfs(d) binary. + +-%if ( 0%{!?_without_server:1} ) +-%package ganesha +-Summary: NFS-Ganesha configuration +-Group: Applications/File +- +-Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: nfs-ganesha-gluster, pcs, dbus +-%if ( 0%{?rhel} && 0%{?rhel} == 6 ) +-Requires: cman, pacemaker, corosync +-%endif +- +-%description ganesha +-GlusterFS is a distributed file-system capable of scaling to several +-petabytes. It aggregates various storage bricks over Infiniband RDMA +-or TCP/IP interconnect into one large parallel network file +-system. GlusterFS is one of the most sophisticated file systems in +-terms of features and extensibility. It borrows a powerful concept +-called Translators from GNU Hurd kernel. Much of the code in GlusterFS +-is in user space and easily manageable. +- +-This package provides the configuration and related files for using +-NFS-Ganesha as the NFS server using GlusterFS +-%endif +- + %if ( 0%{!?_without_georeplication:1} ) + %package geo-replication + Summary: GlusterFS Geo-replication +@@ -1157,6 +1133,62 @@ exit 0 + %exclude %{_datadir}/glusterfs/run-tests.sh + %exclude %{_datadir}/glusterfs/tests + %endif ++%if 0%{?_without_server:1} ++%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-5.8.conf ++%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-7.2.conf ++%exclude %{_sysconfdir}/glusterfs/glusterd.vol ++%exclude %{_sysconfdir}/glusterfs/glusterfs-georep-logrotate ++%exclude %{_sysconfdir}/glusterfs/glusterfs-logrotate ++%exclude %{_sysconfdir}/glusterfs/group-db-workload ++%exclude %{_sysconfdir}/glusterfs/group-distributed-virt ++%exclude %{_sysconfdir}/glusterfs/group-gluster-block ++%exclude %{_sysconfdir}/glusterfs/group-metadata-cache ++%exclude %{_sysconfdir}/glusterfs/group-nl-cache ++%exclude %{_sysconfdir}/glusterfs/group-virt.example ++%exclude %{_sysconfdir}/glusterfs/logger.conf.example ++%exclude %{_sysconfdir}/rsyslog.d/gluster.conf.example ++%exclude %{_prefix}/bin/glusterfind ++%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml ++%exclude %{_prefix}/lib/systemd/system/glusterd.service ++%exclude %{_prefix}/lib/systemd/system/glusterfssharedstorage.service ++%exclude %{_prefix}/lib/tmpfiles.d/gluster.conf ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix-locks.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quotad.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so ++%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so ++%exclude %{_libexecdir}/glusterfs/* ++%exclude %{_sbindir}/conf.py ++%exclude %{_sbindir}/gcron.py ++%exclude %{_sbindir}/gf_attach ++%exclude %{_sbindir}/gfind_missing_files ++%exclude %{_sbindir}/glfsheal ++%exclude %{_sbindir}/gluster ++%exclude %{_sbindir}/gluster-setgfid2path ++%exclude %{_sbindir}/glusterd ++%exclude %{_sbindir}/snap_scheduler.py ++%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh ++%exclude %{_datadir}/glusterfs/scripts/control-mem.sh ++%exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh ++%exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh ++%exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh ++%exclude %{_sharedstatedir}/glusterd/* ++%endif + + %files api + %exclude %{_libdir}/*.so +@@ -1190,7 +1222,13 @@ exit 0 + %exclude %{_includedir}/glusterfs/api + %exclude %{_libdir}/libgfapi.so + %{_libdir}/*.so ++%if ( 0%{?_without_server:1} ) ++%exclude %{_libdir}/pkgconfig/libgfchangelog.pc ++%exclude %{_libdir}/libgfchangelog.so ++%else + %{_libdir}/pkgconfig/libgfchangelog.pc ++%{_libdir}/libgfchangelog.so ++%endif + + %files client-xlators + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +-- +1.8.3.1 + diff --git a/SOURCES/0012-build-add-pretrans-check.patch b/SOURCES/0012-build-add-pretrans-check.patch new file mode 100644 index 0000000..efac62f --- /dev/null +++ b/SOURCES/0012-build-add-pretrans-check.patch @@ -0,0 +1,73 @@ +From f054086daf4549a6227196fe37a57a7e49aa5849 Mon Sep 17 00:00:00 2001 +From: "Bala.FA" <barumuga@redhat.com> +Date: Fri, 7 Dec 2018 14:13:40 +0530 +Subject: [PATCH 12/52] build: add pretrans check + +This patch adds pretrans check for client-xlators + +NOTE: ganesha and python-gluster sub-packages are now obsolete + +Label: DOWNSTREAM ONLY + +Change-Id: I454016319832c11902c0ca79a79fbbcf8ac0a121 +Signed-off-by: Bala.FA <barumuga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/50967 +Reviewed-on: https://code.engineering.redhat.com/gerrit/60140 +Tested-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 39 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1c631db..a1ff6e0 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1704,6 +1704,45 @@ if not (ok == 0) then + end + + ++%pretrans client-xlators -p <lua> ++if not posix.access("/bin/bash", "x") then ++ -- initial installation, no shell, no running glusterfsd ++ return 0 ++end ++ ++-- TODO: move this completely to a lua script ++-- For now, we write a temporary bash script and execute that. ++ ++script = [[#!/bin/sh ++pidof -c -o %PPID -x glusterfsd &>/dev/null ++ ++if [ $? -eq 0 ]; then ++ pushd . > /dev/null 2>&1 ++ for volume in /var/lib/glusterd/vols/*; do cd $volume; ++ vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` ++ volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` ++ if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then ++ exit 1; ++ fi ++ done ++ ++ popd > /dev/null 2>&1 ++ exit 1; ++fi ++]] ++ ++-- rpm in RHEL5 does not have os.tmpname() ++-- io.tmpfile() can not be resolved to a filename to pass to bash :-/ ++tmpname = "/tmp/glusterfs-client-xlators_pretrans_" .. os.date("%s") ++tmpfile = io.open(tmpname, "w") ++tmpfile:write(script) ++tmpfile:close() ++ok, how, val = os.execute("/bin/bash " .. tmpname) ++os.remove(tmpname) ++if not (ok == 0) then ++ error("Detected running glusterfs processes", ok) ++end ++ + + %pretrans devel -p <lua> + if not posix.access("/bin/bash", "x") then +-- +1.8.3.1 + diff --git a/SOURCES/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch b/SOURCES/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch new file mode 100644 index 0000000..5873f3e --- /dev/null +++ b/SOURCES/0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch @@ -0,0 +1,50 @@ +From 39932e6bbc8de25813387bb1394cc7942b79ef46 Mon Sep 17 00:00:00 2001 +From: anand <anekkunt@redhat.com> +Date: Wed, 18 Nov 2015 16:13:46 +0530 +Subject: [PATCH 13/52] glusterd: fix info file checksum mismatch during + upgrade + +peers are moving rejected state when upgrading from RHS2.1 to RHGS3.1.2 +due to checksum mismatch. + +Label: DOWNSTREAM ONLY + +Change-Id: Ifea6b7dfe8477c7f17eefc5ca87ced58aaa21c84 +Signed-off-by: anand <anekkunt@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/61774 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 51ca3d1..fb52957 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1009,10 +1009,18 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- snprintf(buf, sizeof(buf), "%d", volinfo->op_version); +- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); +- if (ret) +- goto out; ++ if (conf->op_version >= GD_OP_VERSION_RHS_3_0) { ++ snprintf (buf, sizeof (buf), "%d", volinfo->op_version); ++ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); ++ if (ret) ++ goto out; ++ ++ snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version); ++ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, ++ buf); ++ if (ret) ++ goto out; ++ } + + snprintf(buf, sizeof(buf), "%d", volinfo->client_op_version); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, +-- +1.8.3.1 + diff --git a/SOURCES/0014-build-spec-file-conflict-resolution.patch b/SOURCES/0014-build-spec-file-conflict-resolution.patch new file mode 100644 index 0000000..fb8aeba --- /dev/null +++ b/SOURCES/0014-build-spec-file-conflict-resolution.patch @@ -0,0 +1,72 @@ +From f76d2370160c50a1f59d08a03a444254c289da60 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Fri, 7 Dec 2018 16:18:07 +0530 +Subject: [PATCH 14/52] build: spec file conflict resolution + +Missed conflict resolution for removing references to +gluster.conf.example as mentioned in patch titled: +packaging: gratuitous dependencies on rsyslog-mm{count,jsonparse} +by Kaleb + +References to hook scripts S31ganesha-start.sh and +S31ganesha-reset.sh got lost in the downstream only +patch conflict resolution. + +Commented blanket reference to %{_sharedsstatedir}/glusterd/* +in section %files server to avoid rpmbuild warning related to +multiple references to hook scripts and other files under +/var/lib/glusterd. + +Label: DOWNSTREAM ONLY + +Change-Id: I9d409f1595ab985ed9f79d9d4f4298877609ba17 +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/70535 +Reviewed-by: Rajesh Joseph <rjoseph@redhat.com> +Tested-by: Rajesh Joseph <rjoseph@redhat.com> +--- + glusterfs.spec.in | 21 +-------------------- + 1 file changed, 1 insertion(+), 20 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index a1ff6e0..8c57f57 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -96,9 +96,7 @@ + + # disable server components forcefully as rhel <= 6 + %if ( 0%{?rhel} ) +-%if ( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) +-%global _without_server %{nil} +-%else ++%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ))) + %global _without_server --without-server + %endif + %endif +@@ -836,23 +834,6 @@ install -D -p -m 0644 extras/glusterfs-georep-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep + %endif + +-%if ( 0%{!?_without_syslog:1} ) +-%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) +-install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \ +- %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example +-%endif +- +-%if ( 0%{?rhel} && 0%{?rhel} == 6 ) +-install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \ +- %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example +-%endif +- +-%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) +-install -D -p -m 0644 extras/logger.conf.example \ +- %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example +-%endif +-%endif +- + %if ( 0%{!?_without_server:1} ) + touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info + touch %{buildroot}%{_sharedstatedir}/glusterd/options +-- +1.8.3.1 + diff --git a/SOURCES/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch b/SOURCES/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch new file mode 100644 index 0000000..b82e19b --- /dev/null +++ b/SOURCES/0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch @@ -0,0 +1,198 @@ +From 3d0e09400dc21dbb5f76fd9ca4bfce3edad0d626 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Fri, 14 Oct 2016 12:53:27 +0530 +Subject: [PATCH 15/52] build: randomize temp file names in pretrans scriptlets + +Security issue CVE-2015-1795 mentions about possibility of file name +spoof attack for the %pretrans server scriptlet. +Since %pretrans scriptlets are executed only for server builds, we can +use os.tmpname() to randomize temporary file names for all %pretrans +scriptlets using this mechanism. + +Label: DOWNSTREAM ONLY + +Change-Id: Ic82433897432794b6d311d836355aa4bad886369 +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/86187 +Reviewed-by: Siddharth Sharma <siddharth@redhat.com> +Reviewed-by: Niels de Vos <ndevos@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 84 +++++++++++++++++++++++++++++++------------------------ + 1 file changed, 48 insertions(+), 36 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 8c57f57..3a98822 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1549,9 +1549,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1590,9 +1591,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-api_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1631,9 +1633,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-api-devel_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1672,9 +1675,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-cli_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1712,9 +1716,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-client-xlators_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1752,9 +1757,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-devel_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1793,9 +1799,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-fuse_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1835,9 +1842,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-geo-replication_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1877,9 +1885,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-libs_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1919,9 +1928,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-rdma_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -1962,9 +1972,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-resource-agents_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +@@ -2004,9 +2015,10 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- rpm in RHEL5 does not have os.tmpname() +--- io.tmpfile() can not be resolved to a filename to pass to bash :-/ +-tmpname = "/tmp/glusterfs-server_pretrans_" .. os.date("%s") ++-- Since we run pretrans scripts only for RPMs built for a server build, ++-- we can now use os.tmpname() since it is available on RHEL6 and later ++-- platforms which are server platforms. ++tmpname = os.tmpname() + tmpfile = io.open(tmpname, "w") + tmpfile:write(script) + tmpfile:close() +-- +1.8.3.1 + diff --git a/SOURCES/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch b/SOURCES/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch new file mode 100644 index 0000000..402b835 --- /dev/null +++ b/SOURCES/0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch @@ -0,0 +1,42 @@ +From c283f15ac9bfb1c98ce95ed0000ebed81cd3b318 Mon Sep 17 00:00:00 2001 +From: Poornima G <pgurusid@redhat.com> +Date: Wed, 26 Apr 2017 14:07:58 +0530 +Subject: [PATCH 16/52] glusterd, parallel-readdir: Change the op-version of + parallel-readdir to 31100 + +Issue: Downstream 3.2 was released with op-version 31001, parallel-readdir +feature in upstream was released in 3.10 and hence with op-version 31000. +With this, parallel-readdir will be allowed in 3.2 cluster/clients as well. +But 3.2 didn't have parallel-readdir feature backported. + +Fix: +Increase the op-version of parallel-readdir feature only in downstream +to 31100(3.3 highest op-version) + +Label: DOWNSTREAM ONLY + +Change-Id: I2640520985627f3a1cb4fb96e28350f8bb9b146c +Signed-off-by: Poornima G <pgurusid@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/104403 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index d07fc10..a31ecda 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2718,7 +2718,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .option = "parallel-readdir", + .value = "off", + .type = DOC, +- .op_version = GD_OP_VERSION_3_10_0, ++ .op_version = GD_OP_VERSION_3_11_0, + .validate_fn = validate_parallel_readdir, + .description = "If this option is enabled, the readdir operation " + "is performed in parallel on all the bricks, thus " +-- +1.8.3.1 + diff --git a/SOURCES/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch b/SOURCES/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch new file mode 100644 index 0000000..f536c9c --- /dev/null +++ b/SOURCES/0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch @@ -0,0 +1,37 @@ +From 5d3315a53611f23a69f88bc8266448e258e2e10f Mon Sep 17 00:00:00 2001 +From: Samikshan Bairagya <sbairagy@redhat.com> +Date: Mon, 10 Jul 2017 11:54:52 +0530 +Subject: [PATCH 17/52] glusterd: Revert op-version for + "cluster.max-brick-per-process" + +The op-version for the "cluster.max-brick-per-process" option was +set to 3.12.0 in the upstream patch and was backported here: +https://code.engineering.redhat.com/gerrit/#/c/111799. This commit +reverts the op-version for this option to 3.11.1 instead. + +Label: DOWNSTREAM ONLY + +Change-Id: I23639cef43d41915eea0394d019b1e0796a99d7b +Signed-off-by: Samikshan Bairagya <sbairagy@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/111804 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index a31ecda..9a6fe9f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2794,7 +2794,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, +- .op_version = GD_OP_VERSION_3_12_0, ++ .op_version = GD_OP_VERSION_3_11_1, + .validate_fn = validate_mux_limit, + .type = GLOBAL_DOC, + .description = "This option can be used to limit the number of brick " +-- +1.8.3.1 + diff --git a/SOURCES/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch b/SOURCES/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch new file mode 100644 index 0000000..ab3530e --- /dev/null +++ b/SOURCES/0018-cli-Add-message-for-user-before-modifying-brick-mult.patch @@ -0,0 +1,56 @@ +From 539626a64e5b8cfe05d42f5398073e8a57644073 Mon Sep 17 00:00:00 2001 +From: Samikshan Bairagya <sbairagy@redhat.com> +Date: Wed, 9 Aug 2017 14:32:59 +0530 +Subject: [PATCH 18/52] cli: Add message for user before modifying + brick-multiplex option + +Users should ne notified that brick-multiplexing feature is +supported only for container workloads (CNS/CRS). It should also be +made known to users that it is advisable to either have all volumes +in stopped state or have no bricks running before modifying the +"brick-multiplex" option. This commit makes sure these messages +are displayed to the user before brick-multiplexing is enabled or +disabled. + +Label: DOWNSTREAM ONLY + +Change-Id: Ic40294b26c691ea03185c4d1fce840ef23f95718 +Signed-off-by: Samikshan Bairagya <sbairagy@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/114793 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + cli/src/cli-cmd-parser.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index d9913f6..f148c59 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1698,6 +1698,24 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + } + } + ++ if ((strcmp (key, "cluster.brick-multiplex") == 0)) { ++ question = "Brick-multiplexing is supported only for " ++ "container workloads (CNS/CRS). Also it is " ++ "advised to make sure that either all " ++ "volumes are in stopped state or no bricks " ++ "are running before this option is modified." ++ "Do you still want to continue?"; ++ ++ answer = cli_cmd_get_confirmation (state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log ("cli", GF_LOG_ERROR, "Operation " ++ "cancelled, exiting"); ++ *op_errstr = gf_strdup ("Aborted by user."); ++ ret = -1; ++ goto out; ++ } ++ } ++ + ret = dict_set_int32(dict, "count", wordcount - 3); + + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch b/SOURCES/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch new file mode 100644 index 0000000..e1287c9 --- /dev/null +++ b/SOURCES/0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch @@ -0,0 +1,99 @@ +From 8a3035bf612943694a3cd1c6a857bd009e84f55d Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Tue, 10 Oct 2017 09:58:24 +0530 +Subject: [PATCH 19/52] build: launch glusterd upgrade after all new bits are + installed + +Problem: +glusterd upgrade mode needs new bits from glusterfs-rdma which +optional and causes the dependency graph to break since it is +not tied into glusterfs-server requirements + +Solution: +Run glusterd upgrade mode after all new bits are installed +i.e. in %posttrans server section + +Label: DOWNSTREAM ONLY + +Change-Id: I356e02d0bf0eaaef43c20ce07b388262f63093a4 +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/120094 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Raghavendra Talur <rtalur@redhat.com> +--- + glusterfs.spec.in | 51 +++++++++++++++++++++++++++++---------------------- + 1 file changed, 29 insertions(+), 22 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 3a98822..208a82d 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -946,28 +946,6 @@ fi + %firewalld_reload + %endif + +-pidof -c -o %PPID -x glusterd &> /dev/null +-if [ $? -eq 0 ]; then +- kill -9 `pgrep -f gsyncd.py` &> /dev/null +- +- killall --wait glusterd &> /dev/null +- glusterd --xlator-option *.upgrade=on -N +- +- #Cleaning leftover glusterd socket file which is created by glusterd in +- #rpm_script_t context. +- rm -f %{_rundir}/glusterd.socket +- +- # glusterd _was_ running, we killed it, it exited after *.upgrade=on, +- # so start it again +- %service_start glusterd +-else +- glusterd --xlator-option *.upgrade=on -N +- +- #Cleaning leftover glusterd socket file which is created by glusterd in +- #rpm_script_t context. +- rm -f %{_rundir}/glusterd.socket +-fi +-exit 0 + %endif + + ##----------------------------------------------------------------------------- +@@ -2027,6 +2005,35 @@ os.remove(tmpname) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end ++ ++%posttrans server ++pidof -c -o %PPID -x glusterd &> /dev/null ++if [ $? -eq 0 ]; then ++ kill -9 `pgrep -f gsyncd.py` &> /dev/null ++ ++ killall --wait -SIGTERM glusterd &> /dev/null ++ ++ if [ "$?" != "0" ]; then ++ echo "killall failed while killing glusterd" ++ fi ++ ++ glusterd --xlator-option *.upgrade=on -N ++ ++ #Cleaning leftover glusterd socket file which is created by glusterd in ++ #rpm_script_t context. ++ rm -rf /var/run/glusterd.socket ++ ++ # glusterd _was_ running, we killed it, it exited after *.upgrade=on, ++ # so start it again ++ %service_start glusterd ++else ++ glusterd --xlator-option *.upgrade=on -N ++ ++ #Cleaning leftover glusterd socket file which is created by glusterd in ++ #rpm_script_t context. ++ rm -rf /var/run/glusterd.socket ++fi ++ + %endif + + %changelog +-- +1.8.3.1 + diff --git a/SOURCES/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch b/SOURCES/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch new file mode 100644 index 0000000..c00c7f4 --- /dev/null +++ b/SOURCES/0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch @@ -0,0 +1,38 @@ +From 968e5e698a070f9e6905a86c9c8338c36fcfa339 Mon Sep 17 00:00:00 2001 +From: moagrawa <moagrawa@redhat.com> +Date: Mon, 15 Jan 2018 18:21:27 +0530 +Subject: [PATCH 20/52] spec: unpackaged files found for RHEL-7 client build + +Problem: unpackages files found for RHEL-7 client build + +Solution: Update glusterfs.specs.in to exclude unpackage files +Label: DOWNSTREAM ONLY + +Change-Id: I761188a6a8447105b53bf3334ded963c645cab5b +Signed-off-by: moagrawa <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/127758 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Milind Changire <mchangir@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 208a82d..ec06176 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1141,8 +1141,10 @@ exit 0 + %exclude %{_sbindir}/gluster-setgfid2path + %exclude %{_sbindir}/glusterd + %exclude %{_sbindir}/snap_scheduler.py ++%if ( 0%{?_with_systemd:1} ) + %exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh + %exclude %{_datadir}/glusterfs/scripts/control-mem.sh ++%endif + %exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh + %exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh + %exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh +-- +1.8.3.1 + diff --git a/SOURCES/0021-cli-glusterfsd-remove-copyright-information.patch b/SOURCES/0021-cli-glusterfsd-remove-copyright-information.patch new file mode 100644 index 0000000..0aa1d07 --- /dev/null +++ b/SOURCES/0021-cli-glusterfsd-remove-copyright-information.patch @@ -0,0 +1,66 @@ +From fbc7f0e5ac8c292b865a8e02ceed2efa101d145c Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 12 Mar 2018 19:47:11 +0530 +Subject: [PATCH 21/52] cli/glusterfsd: remove copyright information + +There's no point of dumping upstream copyright information in --version. + +Label: DOWNSTREAM ONLY + +Change-Id: I3a10e30878698e1d53082936bbf22bca560a3896 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/132445 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Milind Changire <mchangir@redhat.com> +--- + cli/src/cli.c | 11 +---------- + glusterfsd/src/glusterfsd.c | 11 +---------- + 2 files changed, 2 insertions(+), 20 deletions(-) + +diff --git a/cli/src/cli.c b/cli/src/cli.c +index 84ce0f4..08f117e 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -65,16 +65,7 @@ extern int connected; + /* using argp for command line parsing */ + + const char *argp_program_version = +- "" PACKAGE_NAME " " PACKAGE_VERSION +- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION +- "\n" +- "Copyright (c) 2006-2016 Red Hat, Inc. " +- "<https://www.gluster.org/>\n" +- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" +- "It is licensed to you under your choice of the GNU Lesser\n" +- "General Public License, version 3 or any later version (LGPLv3\n" +- "or later), or the GNU General Public License, version 2 (GPLv2),\n" +- "in all cases as published by the Free Software Foundation."; ++ PACKAGE_NAME" "PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + struct rpc_clnt *global_quotad_rpc; +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 5d46b3d..c983882 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -86,16 +86,7 @@ static char argp_doc[] = + "--volfile-server=SERVER [MOUNT-POINT]\n" + "--volfile=VOLFILE [MOUNT-POINT]"; + const char *argp_program_version = +- "" PACKAGE_NAME " " PACKAGE_VERSION +- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION +- "\n" +- "Copyright (c) 2006-2016 Red Hat, Inc. " +- "<https://www.gluster.org/>\n" +- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" +- "It is licensed to you under your choice of the GNU Lesser\n" +- "General Public License, version 3 or any later version (LGPLv3\n" +- "or later), or the GNU General Public License, version 2 (GPLv2),\n" +- "in all cases as published by the Free Software Foundation."; ++ PACKAGE_NAME" "PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + static error_t +-- +1.8.3.1 + diff --git a/SOURCES/0022-cli-Remove-upstream-doc-reference.patch b/SOURCES/0022-cli-Remove-upstream-doc-reference.patch new file mode 100644 index 0000000..5f9bf28 --- /dev/null +++ b/SOURCES/0022-cli-Remove-upstream-doc-reference.patch @@ -0,0 +1,40 @@ +From 00db0c44d109e6f3e394487bf76ff28ba2eee7de Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 15 Mar 2018 12:56:02 +0530 +Subject: [PATCH 22/52] cli: Remove upstream doc reference + +...that is displayed while creating replica 2 volumes. + +Label: DOWNSTREAM ONLY + +Change-Id: I16b45c8ad3a33cdd2a464d84f51d006d8f568b23 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/132744 +Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + cli/src/cli-cmd-parser.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index f148c59..760a10c 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -606,11 +606,8 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words, + "Replica 2 volumes are prone" + " to split-brain. Use " + "Arbiter or Replica 3 to " +- "avoid this. See: " +- "http://docs.gluster.org/en/latest/" +- "Administrator%20Guide/" +- "Split%20brain%20and%20ways%20to%20deal%20with%20it/." +- "\nDo you still want to " ++ "avoid this.\n" ++ "Do you still want to " + "continue?\n"; + answer = cli_cmd_get_confirmation(state, question); + if (GF_ANSWER_NO == answer) { +-- +1.8.3.1 + diff --git a/SOURCES/0023-hooks-remove-selinux-hooks.patch b/SOURCES/0023-hooks-remove-selinux-hooks.patch new file mode 100644 index 0000000..3d14855 --- /dev/null +++ b/SOURCES/0023-hooks-remove-selinux-hooks.patch @@ -0,0 +1,148 @@ +From 421743b7cfa6a249544f6abb4cca5a612bd20ea1 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Tue, 11 Dec 2018 16:21:43 +0530 +Subject: [PATCH 23/52] hooks: remove selinux hooks + +Label: DOWNSTREAM ONLY + +Change-Id: I810466a0ca99ab21f5a8eac8cdffbb18333d10ad +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/135800 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Jiffin Thottan <jthottan@redhat.com> +Reviewed-by: Milind Changire <mchangir@redhat.com> +--- + configure.ac | 20 -------------------- + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/create/Makefile.am | 1 - + extras/hook-scripts/create/post/Makefile.am | 8 -------- + extras/hook-scripts/delete/Makefile.am | 1 - + extras/hook-scripts/delete/pre/Makefile.am | 8 -------- + glusterfs.spec.in | 2 -- + 7 files changed, 1 insertion(+), 41 deletions(-) + delete mode 100644 extras/hook-scripts/create/Makefile.am + delete mode 100644 extras/hook-scripts/create/post/Makefile.am + delete mode 100644 extras/hook-scripts/delete/Makefile.am + delete mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 2f341de..0d06f5a 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -214,10 +214,6 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile +- extras/hook-scripts/create/Makefile +- extras/hook-scripts/create/post/Makefile +- extras/hook-scripts/delete/Makefile +- extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +@@ -909,21 +905,6 @@ fi + AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"]) + dnl end cloudsync section + +-dnl SELinux feature enablement +-case $host_os in +- linux*) +- AC_ARG_ENABLE([selinux], +- AC_HELP_STRING([--disable-selinux], +- [Disable SELinux features]), +- [USE_SELINUX="${enableval}"], [USE_SELINUX="yes"]) +- ;; +- *) +- USE_SELINUX=no +- ;; +-esac +-AM_CONDITIONAL(USE_SELINUX, test "x${USE_SELINUX}" = "xyes") +-dnl end of SELinux feature enablement +- + AC_CHECK_HEADERS([execinfo.h], [have_backtrace=yes]) + if test "x${have_backtrace}" = "xyes"; then + AC_DEFINE(HAVE_BACKTRACE, 1, [define if found backtrace]) +@@ -1599,7 +1580,6 @@ echo "XML output : $BUILD_XML_OUTPUT" + echo "Unit Tests : $BUILD_UNITTEST" + echo "Track priv ports : $TRACK_PRIVPORTS" + echo "POSIX ACLs : $BUILD_POSIX_ACLS" +-echo "SELinux features : $USE_SELINUX" + echo "firewalld-config : $BUILD_FIREWALLD" + echo "Events : $BUILD_EVENTS" + echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 26059d7..771b37e 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick create delete set start stop reset ++SUBDIRS = add-brick set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +deleted file mode 100644 +index b083a91..0000000 +--- a/extras/hook-scripts/create/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +deleted file mode 100644 +index fd1892e..0000000 +--- a/extras/hook-scripts/create/post/Makefile.am ++++ /dev/null +@@ -1,8 +0,0 @@ +-EXTRA_DIST = S10selinux-label-brick.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ +-if WITH_SERVER +-if USE_SELINUX +-scripts_SCRIPTS = S10selinux-label-brick.sh +-endif +-endif +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +deleted file mode 100644 +index c98a05d..0000000 +--- a/extras/hook-scripts/delete/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +deleted file mode 100644 +index 4fbfbe7..0000000 +--- a/extras/hook-scripts/delete/pre/Makefile.am ++++ /dev/null +@@ -1,8 +0,0 @@ +-EXTRA_DIST = S10selinux-del-fcontext.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ +-if WITH_SERVER +-if USE_SELINUX +-scripts_SCRIPTS = S10selinux-del-fcontext.sh +-endif +-endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ec06176..db50b8e 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1413,7 +1413,6 @@ exit 0 + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1422,7 +1421,6 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +-- +1.8.3.1 + diff --git a/SOURCES/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch b/SOURCES/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch new file mode 100644 index 0000000..59fe63f --- /dev/null +++ b/SOURCES/0024-glusterd-Make-localtime-logging-option-invisible-in-.patch @@ -0,0 +1,50 @@ +From 79c19f0c6d02228aa8cf4b9299afeb7e0b2ad0da Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 16 Apr 2018 17:44:19 +0530 +Subject: [PATCH 24/52] glusterd: Make localtime-logging option invisible in + downstream + +Label: DOWNSTREAM ONLY + +Change-Id: Ie631edebb7e19152392bfd3c369a96e88796bd75 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/135754 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 3 ++- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index dd3f9eb..cbbb5d9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -86,7 +86,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, +- {GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"}, ++ /*{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/ + {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, + {NULL}, + }; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 9a6fe9f..fed2864 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2850,10 +2850,11 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "to have enabled when clients and/or bricks support " + "SELinux."}, + {.key = GLUSTERD_LOCALTIME_LOGGING_KEY, ++ /*{.key = GLUSTERD_LOCALTIME_LOGGING_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_DOC, + .op_version = GD_OP_VERSION_3_12_0, +- .validate_fn = validate_boolean}, ++ .validate_fn = validate_boolean},*/ + {.key = GLUSTERD_DAEMON_LOG_LEVEL_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_NO_DOC, +-- +1.8.3.1 + diff --git a/SOURCES/0025-build-make-RHGS-version-available-for-server.patch b/SOURCES/0025-build-make-RHGS-version-available-for-server.patch new file mode 100644 index 0000000..90f2592 --- /dev/null +++ b/SOURCES/0025-build-make-RHGS-version-available-for-server.patch @@ -0,0 +1,45 @@ +From 12ae1a9a62c2c94af44f55b03575ab8806bd22ee Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Mon, 23 Apr 2018 13:16:30 +0530 +Subject: [PATCH 25/52] build: make RHGS version available for server + +Make /usr/share/glusterfs/release available for gluserfs-server package. +This file contains the RHGS release number for the release. + +Label: DOWNSTREAM ONLY + +Change-Id: I7485f77cfb8ca7f0f8363a20124900ae9ae8a528 +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/137139 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index db50b8e..bdb47ba 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -862,6 +862,10 @@ install -p -m 0744 -D extras/command-completion/gluster.bash \ + %{buildroot}%{_sysconfdir}/bash_completion.d/gluster + %endif + ++%if ( 0%{!?_without_server:1} ) ++echo "RHGS 3.5" > %{buildroot}%{_datadir}/glusterfs/release ++%endif ++ + %clean + rm -rf %{buildroot} + +@@ -1452,6 +1456,7 @@ exit 0 + + # Extra utility script + %dir %{_libexecdir}/glusterfs ++ %{_datadir}/glusterfs/release + %dir %{_datadir}/glusterfs/scripts + %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh + %if ( 0%{?_with_systemd:1} ) +-- +1.8.3.1 + diff --git a/SOURCES/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch b/SOURCES/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch new file mode 100644 index 0000000..8aa9fde --- /dev/null +++ b/SOURCES/0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch @@ -0,0 +1,68 @@ +From a3538a7d1fb7674acdf0934847f4004d8fbc4709 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Tue, 11 Dec 2018 17:57:50 +0530 +Subject: [PATCH 26/52] glusterd: Introduce daemon-log-level cluster wide + option + +This option, applicable to the node level daemons can be very helpful in +controlling the log level of these services. Please note any daemon +which is started prior to setting the specific value of this option (if +not INFO) will need to go through a restart to have this change into +effect. + +> upstream patch : https://review.gluster.org/#/c/20442/ + +Please note there's a difference in deownstream delta. The op-version +against this option is already tageed as 3_11_2 in RHGS 3.3.1 and hence +the same is retained. Marking this DOWNSTREAM_ONLY label because of + +Label: DOWNSTREAM ONLY + +IMPORTANT: +This patch only sets .op_version in glusterd-volume-set.c to +GD_OP_VERSION_3_11_2 as per Atin's recommendation on +Tue, Dec 11, 2018 5:46pm IST + +>Change-Id: I7f6d2620bab2b094c737f5cc816bc093e9c9c4c9 +>fixes: bz#1597473 +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +Change-Id: I7f6d2620bab2b094c737f5cc816bc093e9c9c4c9 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/143137 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index b9da872..a278f18 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -104,6 +104,8 @@ + + #define GD_OP_VERSION_3_11_1 31101 /* Op-version for GlusterFS 3.11.1 */ + ++#define GD_OP_VERSION_3_11_2 31102 /* Op-version for GlusterFS 3.11.2 */ ++ + #define GD_OP_VERSION_3_12_0 31200 /* Op-version for GlusterFS 3.12.0 */ + + #define GD_OP_VERSION_3_12_2 31202 /* Op-version for GlusterFS 3.12.2 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index fed2864..84f2705 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2859,7 +2859,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "mgmt/glusterd", + .type = GLOBAL_NO_DOC, + .value = "INFO", +- .op_version = GD_OP_VERSION_5_0}, ++ .op_version = GD_OP_VERSION_3_11_2}, + {.key = "debug.delay-gen", + .voltype = "debug/delay-gen", + .option = "!debug", +-- +1.8.3.1 + diff --git a/SOURCES/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch b/SOURCES/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch new file mode 100644 index 0000000..76b430c --- /dev/null +++ b/SOURCES/0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch @@ -0,0 +1,50 @@ +From 9be3c4745b161f1815f77cd19b550ac9795845f5 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 20 Sep 2018 22:01:05 +0530 +Subject: [PATCH 27/52] glusterd: change op-version of fips-mode-rchecksum + +..to GD_OP_VERSION_3_13_3 since GD_OP_VERSION_4_0_0 is not present in +rhgs-3.4.1 + +Label: DOWNSTREAM ONLY + +Change-Id: I759272748177d174b15123faffc2305f7a5ec58f +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/150714 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index a278f18..4a82889 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -118,6 +118,8 @@ + + #define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */ + ++#define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 84f2705..2bd0a9c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2329,7 +2329,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .key = "storage.fips-mode-rchecksum", + .type = NO_DOC, + .voltype = "storage/posix", +- .op_version = GD_OP_VERSION_4_0_0, ++ .op_version = GD_OP_VERSION_3_13_3, + }, + { + .option = "force-create-mode", +-- +1.8.3.1 + diff --git a/SOURCES/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch b/SOURCES/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch new file mode 100644 index 0000000..b39c16b --- /dev/null +++ b/SOURCES/0028-glusterd-Reset-op-version-for-features.shard-deletio.patch @@ -0,0 +1,52 @@ +From 64ffcf770c5c0087f8937b5235ed0ad5b0efe7f2 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Wed, 12 Sep 2018 21:41:35 +0530 +Subject: [PATCH 28/52] glusterd: Reset op-version for + "features.shard-deletion-rate" + +The op-version for the "features.shard-deletion-rate" option was set to +4.2.0 in the upstream patch and backported at +e75be952569eb69325d5f505f7ab94aace31be52. +This commit reverts the op-version for this option to 3.13.3. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie3d12f3119ad7a4b40d81bd8bd6ed591658e8371 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/154865 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 4a82889..4d95f75 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -120,6 +120,8 @@ + + #define GD_OP_VERSION_3_13_3 31303 /* Op-version for GlusterFS 3.13.3 */ + ++#define GD_OP_VERSION_3_13_4 31304 /* Op-version for GlusterFS 3.13.4 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 2bd0a9c..2f3271f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2552,7 +2552,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + {.key = "features.shard-deletion-rate", + .voltype = "features/shard", +- .op_version = GD_OP_VERSION_5_0, ++ .op_version = GD_OP_VERSION_3_13_4, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.scrub-throttle", +-- +1.8.3.1 + diff --git a/SOURCES/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch b/SOURCES/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch new file mode 100644 index 0000000..752a81b --- /dev/null +++ b/SOURCES/0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch @@ -0,0 +1,39 @@ +From b504052d003aa41fbd44eec286d1733b6f2a168e Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Tue, 6 Nov 2018 18:44:55 +0530 +Subject: [PATCH 29/52] glusterd: Reset op-version for + "features.shard-lru-limit" + +The op-version for the "features.shard-lru-limit" option was set to +4.2.0 in the upstream patch and backported at +41e7e33c6512e98a1567e5a5532d3898b59cfa98 + +This commit reverts the op-version for this option to 3.13.4. + +Label: DOWNSTREAM ONLY + +Change-Id: I7d3ed6b373851267c78fc6815a83bee2c0906413 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/155127 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez <xhernandez@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 2f3271f..4bf89a6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2546,7 +2546,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + { + .key = "features.shard-lru-limit", + .voltype = "features/shard", +- .op_version = GD_OP_VERSION_5_0, ++ .op_version = GD_OP_VERSION_3_13_4, + .flags = VOLOPT_FLAG_CLIENT_OPT, + .type = NO_DOC, + }, +-- +1.8.3.1 + diff --git a/SOURCES/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch b/SOURCES/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch new file mode 100644 index 0000000..b50236d --- /dev/null +++ b/SOURCES/0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch @@ -0,0 +1,42 @@ +From 1d2d29396ee25f09c7d379a992ac9bd244e89c39 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Thu, 13 Dec 2018 14:28:57 +0530 +Subject: [PATCH 30/52] selinux/glusterd : add "features.selinux" to + glusterd-volume-set.c + +updates: #593 +Change-Id: I38675ba4d47c8ba7f94cfb4734692683ddb3dcfd +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 4bf89a6..11265bf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1203,10 +1203,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "performance/io-threads", + .option = "pass-through", + .op_version = GD_OP_VERSION_4_1_0}, +- {.key = "performance.least-rate-limit", +- .voltype = "performance/io-threads", +- .op_version = 1 +- }, ++ {.key = "performance.least-rate-limit", ++ .voltype = "performance/io-threads", ++ .op_version = 1}, + + /* Other perf xlators' options */ + {.key = "performance.io-cache-pass-through", +@@ -2849,7 +2848,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "trusted.gluster.selinux on the bricks. Recommended " + "to have enabled when clients and/or bricks support " + "SELinux."}, +- {.key = GLUSTERD_LOCALTIME_LOGGING_KEY, + /*{.key = GLUSTERD_LOCALTIME_LOGGING_KEY, + .voltype = "mgmt/glusterd", + .type = GLOBAL_DOC, +-- +1.8.3.1 + diff --git a/SOURCES/0031-glusterd-turn-off-selinux-feature-in-downstream.patch b/SOURCES/0031-glusterd-turn-off-selinux-feature-in-downstream.patch new file mode 100644 index 0000000..a7e1e26 --- /dev/null +++ b/SOURCES/0031-glusterd-turn-off-selinux-feature-in-downstream.patch @@ -0,0 +1,34 @@ +From c3176144e531e22bfe97d0fef3b0e3e449fb1d32 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 16 Apr 2018 13:47:12 +0530 +Subject: [PATCH 31/52] glusterd: turn off selinux feature in downstream + +In RHGS 3.4.0 selinux feature was never meant to be qualified. + +Label: DOWNSTREAM ONLY + +Change-Id: I0cd5eb5207a757c8b6ef789980c061f211410bd5 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/135716 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 11265bf..d1244e4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2842,7 +2842,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = VKEY_FEATURES_SELINUX, + .voltype = "features/selinux", + .type = NO_DOC, +- .value = "on", ++ .value = "off", + .op_version = GD_OP_VERSION_3_11_0, + .description = "Convert security.selinux xattrs to " + "trusted.gluster.selinux on the bricks. Recommended " +-- +1.8.3.1 + diff --git a/SOURCES/0032-glusterd-update-gd-op-version-to-3_7_0.patch b/SOURCES/0032-glusterd-update-gd-op-version-to-3_7_0.patch new file mode 100644 index 0000000..82b34e3 --- /dev/null +++ b/SOURCES/0032-glusterd-update-gd-op-version-to-3_7_0.patch @@ -0,0 +1,29 @@ +From bfa7055c3901b34a49f7933ea9edcf6465843de1 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Wed, 23 Jan 2019 14:22:00 +0530 +Subject: [PATCH 32/52] glusterd: update gd-op-version to 3_7_0 + +Label: DOWNSTREAM ONLY + +Change-Id: Ia6456134cd7e544a415692d09cd1ccbb6e02dd82 +Signed-off-by: Milind Changire <mchangir@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index 6365b6e..e20e3c4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -1174,7 +1174,7 @@ glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr) + * 'force' + */ + ret = glusterd_check_client_op_version_support( +- volname, GD_OP_VERSION_3_6_0, NULL); ++ volname, GD_OP_VERSION_3_7_0, NULL); + if (ret) { + ret = gf_asprintf(op_errstr, + "Volume %s has one or " +-- +1.8.3.1 + diff --git a/SOURCES/0033-build-add-missing-explicit-package-dependencies.patch b/SOURCES/0033-build-add-missing-explicit-package-dependencies.patch new file mode 100644 index 0000000..57c2919 --- /dev/null +++ b/SOURCES/0033-build-add-missing-explicit-package-dependencies.patch @@ -0,0 +1,83 @@ +From 52e2d75c2c8e32d2e4f69840e34d21b39279284a Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Thu, 13 Dec 2018 12:46:56 +0530 +Subject: [PATCH 33/52] build: add missing explicit package dependencies + +Add dependencies for glusterfs-libs, and other packages. +This is an Errata Tool whine. + +Label: DOWNSTREAM ONLY + +Change-Id: Ieaadb6e4ffa84d1811aa740f7891855568ecbcbb +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/158501 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index bdb47ba..9cd4372 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -323,6 +323,7 @@ and client framework. + Summary: GlusterFS api library + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description api + GlusterFS is a distributed file-system capable of scaling to several +@@ -340,6 +341,7 @@ Summary: Development Libraries + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-devel%{?_isa} = %{version}-%{release} + Requires: libacl-devel ++Requires: %{name}-api%{?_isa} = %{version}-%{release} + + %description api-devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -391,6 +393,8 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + %if ( 0%{!?_without_extra_xlators:1} ) + Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release} + %endif ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++Requires: %{name}-server%{?_isa} = %{version}-%{release} + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -435,6 +439,7 @@ Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} + + Obsoletes: %{name}-client < %{version}-%{release} + Provides: %{name}-client = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description fuse + GlusterFS is a distributed file-system capable of scaling to several +@@ -459,6 +464,7 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release} + + Requires: rsync + Requires: util-linux ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description geo-replication + GlusterFS is a distributed file-system capable of scaling to several +@@ -536,6 +542,7 @@ BuildRequires: libibverbs-devel + BuildRequires: librdmacm-devel >= 1.0.15 + %endif + Requires: %{name}%{?_isa} = %{version}-%{release} ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description rdma + GlusterFS is a distributed file-system capable of scaling to several +@@ -664,6 +671,7 @@ This package provides the glusterfs thin-arbiter translator. + + %package client-xlators + Summary: GlusterFS client-side translators ++Requires: %{name}-libs%{?_isa} = %{version}-%{release} + + %description client-xlators + GlusterFS is a distributed file-system capable of scaling to several +-- +1.8.3.1 + diff --git a/SOURCES/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch b/SOURCES/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch new file mode 100644 index 0000000..587a25a --- /dev/null +++ b/SOURCES/0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch @@ -0,0 +1,59 @@ +From 463a920541a7579f2407f22597e4014494422804 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Mon, 17 Dec 2018 14:07:01 +0530 +Subject: [PATCH 34/52] glusterd: introduce a new op-version for rhgs-3.4.3 + +This patch introduces a new op-version 31305 for rhgs-3.4.3 and +sets the max op-version to 31305. + +For migrating profile commands (commit e68845ff7018e5d81d7979684b18e6eda449b088) +we used GD_OP_VERSION_6_0 in upstream. we are changing +it to GD_OP_VERSION_3_13_5 here. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie3a05c70eb4e406889c468343f54e999b1218f19 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/158795 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 2 ++ + xlators/mgmt/glusterd/src/glusterd-handler.c | 4 ++-- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 4d95f75..6642ba0 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -122,6 +122,8 @@ + + #define GD_OP_VERSION_3_13_4 31304 /* Op-version for GlusterFS 3.13.4 */ + ++#define GD_OP_VERSION_3_13_5 31305 /* Op-version for GlusterFS 3.13.5 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 387643d..de44af7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3085,12 +3085,12 @@ __glusterd_handle_cli_profile_volume(rpcsvc_request_t *req) + goto out; + } + +- if (conf->op_version < GD_OP_VERSION_6_0) { ++ if (conf->op_version < GD_OP_VERSION_3_13_5) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", +- GD_OP_VERSION_6_0); ++ GD_OP_VERSION_3_13_5); + ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str)); + glusterd_friend_sm(); + glusterd_op_sm(); +-- +1.8.3.1 + diff --git a/SOURCES/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch b/SOURCES/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch new file mode 100644 index 0000000..643ba3a --- /dev/null +++ b/SOURCES/0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch @@ -0,0 +1,41 @@ +From 254033a80d85460675c921c272fb94bb7e9f67d4 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Tue, 18 Dec 2018 17:57:25 +0530 +Subject: [PATCH 35/52] glusterd: tag rebalance mgmt_v3 command to op-version + 31305 + +In upstream migrating rebalance command is tagged to op-version 60000 +but in downstream the latest new op-version is 31305. + +Label: DOWNSTREAM ONLY + +Change-Id: I30bbad3efca29bf42b9a750581eb1aebc8a30ff9 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/158943 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index e20e3c4..ed5ded5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -573,12 +573,12 @@ __glusterd_handle_defrag_volume(rpcsvc_request_t *req) + } else + op = GD_OP_REBALANCE; + +- if (priv->op_version < GD_OP_VERSION_6_0) { ++ if (priv->op_version < GD_OP_VERSION_3_13_5) { + gf_msg_debug(this->name, 0, + "The cluster is operating at " + "version less than %d. Falling back " + "to op-sm framework.", +- GD_OP_VERSION_6_0); ++ GD_OP_VERSION_3_13_5); + ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg)); + glusterd_friend_sm(); + glusterd_op_sm(); +-- +1.8.3.1 + diff --git a/SOURCES/0036-build-add-conditional-dependency-on-server-for-devel.patch b/SOURCES/0036-build-add-conditional-dependency-on-server-for-devel.patch new file mode 100644 index 0000000..352078b --- /dev/null +++ b/SOURCES/0036-build-add-conditional-dependency-on-server-for-devel.patch @@ -0,0 +1,47 @@ +From d6458c40706d8886187bd9c2016087a3a1eee882 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Wed, 19 Dec 2018 13:17:42 +0530 +Subject: [PATCH 36/52] build: add conditional dependency on server for devel + +Add conditional depedency on server for glusterfs-devel + +Label: DOWNSTREAM ONLY + +Change-Id: Icc45df3db137dbc03d240c1ac774b5c8735c5f2f +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/159030 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9cd4372..9db5a34 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -394,7 +394,9 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release} + %endif + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++%if ( 0%{!?_without_server:1} ) + Requires: %{name}-server%{?_isa} = %{version}-%{release} ++%endif + + %description devel + GlusterFS is a distributed file-system capable of scaling to several +@@ -2067,6 +2069,11 @@ fi + * Thu Feb 21 2019 Jiffin Tony Thottan <jthottan@redhat.com> + - Obsoleting gluster-gnfs package + ++* Wed Dec 19 2018 Milind Changire <mchangir@redhat.com> ++- Add explicit package dependencies (#1656357) ++- Remove absolute paths from spec file (#1350745) ++- Do not package crypt.so for FIPS compliance (#1653224) ++ + * Wed Nov 28 2018 Krutika Dhananjay <kdhananj@redhat.com> + - Install /var/lib/glusterd/groups/distributed-virt by default + +-- +1.8.3.1 + diff --git a/SOURCES/0037-cli-change-the-warning-message.patch b/SOURCES/0037-cli-change-the-warning-message.patch new file mode 100644 index 0000000..e4a4544 --- /dev/null +++ b/SOURCES/0037-cli-change-the-warning-message.patch @@ -0,0 +1,35 @@ +From 7e0342e0d01204f136b0bd28931a6313ea216649 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Wed, 6 Feb 2019 19:06:45 +0530 +Subject: [PATCH 37/52] cli: change the warning message + +This patch changes the warning message user gets, when enabling brick +multiplexing to reflect OCS instead of CNS/CRS. + +Label: DOWNSTREAM ONLY + +Change-Id: Id5fd87955d5a692f8e57560245f8b0cf9882e1da +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/162405 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + cli/src/cli-cmd-parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 760a10c..541dc62 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1697,7 +1697,7 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + + if ((strcmp (key, "cluster.brick-multiplex") == 0)) { + question = "Brick-multiplexing is supported only for " +- "container workloads (CNS/CRS). Also it is " ++ "OCS converged or independent mode. Also it is " + "advised to make sure that either all " + "volumes are in stopped state or no bricks " + "are running before this option is modified." +-- +1.8.3.1 + diff --git a/SOURCES/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch b/SOURCES/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch new file mode 100644 index 0000000..00a5af3 --- /dev/null +++ b/SOURCES/0038-spec-avoid-creation-of-temp-file-in-lua-script.patch @@ -0,0 +1,230 @@ +From a577dd0a3cbf435681f10d095a0dca0595c6a354 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Sat, 9 Feb 2019 14:01:28 +0530 +Subject: [PATCH 38/52] spec: avoid creation of temp file in lua script + +Avoiding creation of temporary file to execute bash shell script from a +lua scriptlet increases install time security. + +Label: DOWNSTREAM ONLY + +Change-Id: Ie5b9035f292402b18dea768aca8bc82a1e7fa615 +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/162621 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 120 ++++++------------------------------------------------ + 1 file changed, 12 insertions(+), 108 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9db5a34..df8d116 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1542,15 +1542,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1584,15 +1576,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1626,15 +1610,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1668,15 +1644,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1709,15 +1677,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1750,15 +1710,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1792,15 +1744,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1835,15 +1779,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1878,15 +1814,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1921,15 +1849,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -1965,15 +1885,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +@@ -2008,15 +1920,7 @@ if [ $? -eq 0 ]; then + fi + ]] + +--- Since we run pretrans scripts only for RPMs built for a server build, +--- we can now use os.tmpname() since it is available on RHEL6 and later +--- platforms which are server platforms. +-tmpname = os.tmpname() +-tmpfile = io.open(tmpname, "w") +-tmpfile:write(script) +-tmpfile:close() +-ok, how, val = os.execute("/bin/bash " .. tmpname) +-os.remove(tmpname) ++ok, how, val = os.execute(script) + if not (ok == 0) then + error("Detected running glusterfs processes", ok) + end +-- +1.8.3.1 + diff --git a/SOURCES/0039-cli-fix-query-to-user-during-brick-mux-selection.patch b/SOURCES/0039-cli-fix-query-to-user-during-brick-mux-selection.patch new file mode 100644 index 0000000..82684cb --- /dev/null +++ b/SOURCES/0039-cli-fix-query-to-user-during-brick-mux-selection.patch @@ -0,0 +1,61 @@ +From ec707e099d4e4338d1ea21560d367b02e6339532 Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Mon, 1 Apr 2019 16:16:47 +0530 +Subject: [PATCH 39/52] cli: fix query to user during brick-mux selection + +Label: DOWNSTREAM ONLY + +Change-Id: I59472066b917ea2b23de72bcd91dc3e275d5e055 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 33 +++++++++++++++++---------------- + 1 file changed, 17 insertions(+), 16 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 541dc62..d9ccba1 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1693,23 +1693,24 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + goto out; + } + } +- } +- +- if ((strcmp (key, "cluster.brick-multiplex") == 0)) { +- question = "Brick-multiplexing is supported only for " +- "OCS converged or independent mode. Also it is " +- "advised to make sure that either all " +- "volumes are in stopped state or no bricks " +- "are running before this option is modified." +- "Do you still want to continue?"; + +- answer = cli_cmd_get_confirmation (state, question); +- if (GF_ANSWER_NO == answer) { +- gf_log ("cli", GF_LOG_ERROR, "Operation " +- "cancelled, exiting"); +- *op_errstr = gf_strdup ("Aborted by user."); +- ret = -1; +- goto out; ++ if ((strcmp (key, "cluster.brick-multiplex") == 0)) { ++ question = ++ "Brick-multiplexing is supported only for " ++ "OCS converged or independent mode. Also it is " ++ "advised to make sure that either all " ++ "volumes are in stopped state or no bricks " ++ "are running before this option is modified." ++ "Do you still want to continue?"; ++ ++ answer = cli_cmd_get_confirmation (state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log ("cli", GF_LOG_ERROR, "Operation " ++ "cancelled, exiting"); ++ *op_errstr = gf_strdup ("Aborted by user."); ++ ret = -1; ++ goto out; ++ } + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0040-build-Remove-unsupported-test-cases-failing-consiste.patch b/SOURCES/0040-build-Remove-unsupported-test-cases-failing-consiste.patch new file mode 100644 index 0000000..1d10507 --- /dev/null +++ b/SOURCES/0040-build-Remove-unsupported-test-cases-failing-consiste.patch @@ -0,0 +1,136 @@ +From 79c74009892804419dce264399f3fde357d5b1c3 Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Tue, 2 Apr 2019 11:07:03 +0530 +Subject: [PATCH 40/52] build: Remove unsupported test cases failing + consistently + +The following two test cases failing in downstream regression runs. +Hence removing them as they are not supported downstream. + +tests/basic/cloudsync-sanity.t +tests/bugs/distribute/bug-882278.t + +Label: DOWNSTREAM ONLY + +Change-Id: Ie4b506639a017ec9910e44df1b721d9bfadf07b3 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166662 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/cloudsync-sanity.t | 22 ------------ + tests/bugs/distribute/bug-882278.t | 73 -------------------------------------- + 2 files changed, 95 deletions(-) + delete mode 100644 tests/basic/cloudsync-sanity.t + delete mode 100755 tests/bugs/distribute/bug-882278.t + +diff --git a/tests/basic/cloudsync-sanity.t b/tests/basic/cloudsync-sanity.t +deleted file mode 100644 +index 3cf719d..0000000 +--- a/tests/basic/cloudsync-sanity.t ++++ /dev/null +@@ -1,22 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../include.rc +-. $(dirname $0)/../volume.rc +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9}; +-TEST $CLI volume set $V0 features.cloudsync enable; +-TEST $CLI volume start $V0; +- +-## Mount FUSE +-TEST $GFS -s $H0 --volfile-id $V0 $M1; +- +-# This test covers lookup, mkdir, mknod, symlink, link, rename, +-# create operations +-TEST $(dirname $0)/rpc-coverage.sh $M1 +- +-cleanup; +diff --git a/tests/bugs/distribute/bug-882278.t b/tests/bugs/distribute/bug-882278.t +deleted file mode 100755 +index 8cb5147..0000000 +--- a/tests/bugs/distribute/bug-882278.t ++++ /dev/null +@@ -1,73 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-cleanup +- +-# Is there a good reason to require --fqdn elsewhere? It's worse than useless +-# here. +-H0=$(hostname -s) +- +-function recreate { +- # The rm is necessary so we don't get fooled by leftovers from old runs. +- rm -rf $1 && mkdir -p $1 +-} +- +-function count_lines { +- grep "$1" $2/* | wc -l +-} +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info; +- +-## Start and create a volume +-TEST recreate ${B0}/${V0}-0 +-TEST recreate ${B0}/${V0}-1 +-TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1} +-TEST $CLI volume set $V0 cluster.nufa on +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +- +-## Verify volume is created +-EXPECT "$V0" volinfo_field $V0 'Volume Name'; +-EXPECT 'Created' volinfo_field $V0 'Status'; +- +-## Start volume and verify +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-## Mount native +-special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1" +-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0 +- +-## Create a bunch of test files. +-for i in $(seq 0 99); do +- echo hello > $(printf $M0/file%02d $i) +-done +- +-## Make sure the files went to the right place. There might be link files in +-## the other brick, but they won't have any contents. +-EXPECT "0" count_lines hello ${B0}/${V0}-0 +-EXPECT "100" count_lines hello ${B0}/${V0}-1 +- +-if [ "$EXIT_EARLY" = "1" ]; then +- exit 0; +-fi +- +-## Finish up +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +-TEST $CLI volume stop $V0; +-EXPECT 'Stopped' volinfo_field $V0 'Status'; +- +-TEST $CLI volume delete $V0; +-TEST ! $CLI volume info $V0; +- +-cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch b/SOURCES/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch new file mode 100644 index 0000000..c1e1720 --- /dev/null +++ b/SOURCES/0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch @@ -0,0 +1,43 @@ +From c8f0ac9b429e1ff73a3e87247193c35c66212540 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Tue, 2 Apr 2019 12:06:53 +0530 +Subject: [PATCH 41/52] tests/geo-rep: Build failed in Jenkins for test + bug-1600145.t + +Problem: the ((strcmp (key, "cluster.brick-multiplex") == 0)) +comparision in cli/src/cli-cmd-parser.c is expecting +either yes or no confirmation from cli, which is not handled +in bug-1600145.t, causing test to wait till timeout and +then fail. + +Solution: Passing yes as pipeline to +`gluster v set all cluster.brick-multiplex on` in bug-1600145.t + +Label: DOWNSTREAM ONLY + +Change-Id: I1a6c2a992b65380cea145fd1c46d22ec1251c77e +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166694 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +Reviewed-by: Sunny Kumar <sunkumar@redhat.com> +--- + tests/00-geo-rep/bug-1600145.t | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/00-geo-rep/bug-1600145.t b/tests/00-geo-rep/bug-1600145.t +index 1d38bf9..359bc4f 100644 +--- a/tests/00-geo-rep/bug-1600145.t ++++ b/tests/00-geo-rep/bug-1600145.t +@@ -29,7 +29,7 @@ slave_mnt=$M1 + + ##create_and_start_master_volume + TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2}; +-gluster v set all cluster.brick-multiplex on ++yes | gluster v set all cluster.brick-multiplex on + TEST $CLI volume start $GMV0 + + ##create_and_start_slave_volume +-- +1.8.3.1 + diff --git a/SOURCES/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch b/SOURCES/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch new file mode 100644 index 0000000..7e3d69f --- /dev/null +++ b/SOURCES/0042-spec-client-server-Builds-are-failing-on-rhel-6.patch @@ -0,0 +1,123 @@ +From f25a92028ecc2018953a6375bba43a21d3a93566 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Thu, 4 Apr 2019 16:18:51 +0530 +Subject: [PATCH 42/52] spec: (client|server) Builds are failing on rhel-6 + +Problem: 1) For sever-rpm gluster build is throwing an error + undefined reference to `dlcose` on RHEL 6 + 2) For server-rpm build is throwing reference for + For Not found for rot-13.so and symlink-cache.so + 3) For client-rpms build is throwing an error + File Not found for all files with exclude + file in without_server check + +Solution: 1) For server-rpm add LIB_DL link in Makefile + and remove reference for rot.so and symlink-cache.so + from glusterfs.spec.in + 2) Remove exclude files list as they are not + being build + +Label: DOWNSTREAM ONLY +Change-Id: I2b41604cbc8525b91231b0c5caee588c5d5d6b08 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166962 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 54 ----------------------------------- + xlators/mgmt/glusterd/src/Makefile.am | 2 +- + 2 files changed, 1 insertion(+), 55 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index df8d116..7c7f7c0 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1097,72 +1097,18 @@ exit 0 + %{_tmpfilesdir}/gluster.conf + %endif + %if ( 0%{?_without_extra_xlators:1} ) +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13.so + %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so + %exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache.so + %endif + %if ( 0%{?_without_regression_tests:1} ) + %exclude %{_datadir}/glusterfs/run-tests.sh + %exclude %{_datadir}/glusterfs/tests + %endif + %if 0%{?_without_server:1} +-%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-5.8.conf +-%exclude %{_sysconfdir}/glusterfs/gluster-rsyslog-7.2.conf +-%exclude %{_sysconfdir}/glusterfs/glusterd.vol +-%exclude %{_sysconfdir}/glusterfs/glusterfs-georep-logrotate +-%exclude %{_sysconfdir}/glusterfs/glusterfs-logrotate +-%exclude %{_sysconfdir}/glusterfs/group-db-workload +-%exclude %{_sysconfdir}/glusterfs/group-distributed-virt +-%exclude %{_sysconfdir}/glusterfs/group-gluster-block +-%exclude %{_sysconfdir}/glusterfs/group-metadata-cache +-%exclude %{_sysconfdir}/glusterfs/group-nl-cache +-%exclude %{_sysconfdir}/glusterfs/group-virt.example +-%exclude %{_sysconfdir}/glusterfs/logger.conf.example +-%exclude %{_sysconfdir}/rsyslog.d/gluster.conf.example +-%exclude %{_prefix}/bin/glusterfind +-%exclude %{_prefix}/lib/firewalld/services/glusterfs.xml +-%exclude %{_prefix}/lib/systemd/system/glusterd.service +-%exclude %{_prefix}/lib/systemd/system/glusterfssharedstorage.service +-%exclude %{_prefix}/lib/tmpfiles.d/gluster.conf +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix-locks.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quotad.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so +-%exclude %{_libexecdir}/glusterfs/* +-%exclude %{_sbindir}/conf.py +-%exclude %{_sbindir}/gcron.py +-%exclude %{_sbindir}/gf_attach +-%exclude %{_sbindir}/gfind_missing_files +-%exclude %{_sbindir}/glfsheal +-%exclude %{_sbindir}/gluster +-%exclude %{_sbindir}/gluster-setgfid2path +-%exclude %{_sbindir}/glusterd +-%exclude %{_sbindir}/snap_scheduler.py + %if ( 0%{?_with_systemd:1} ) + %exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh + %exclude %{_datadir}/glusterfs/scripts/control-mem.sh + %endif +-%exclude %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh +-%exclude %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh +-%exclude %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh +-%exclude %{_sharedstatedir}/glusterd/* + %endif + + %files api +diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am +index 6d09e37..c8dd8e3 100644 +--- a/xlators/mgmt/glusterd/src/Makefile.am ++++ b/xlators/mgmt/glusterd/src/Makefile.am +@@ -6,7 +6,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt + glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) \ + -DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\" \ + -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" +-glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) ++glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL) + glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \ + glusterd-store.c glusterd-handshake.c glusterd-pmap.c \ +-- +1.8.3.1 + diff --git a/SOURCES/0043-inode-don-t-dump-the-whole-table-to-CLI.patch b/SOURCES/0043-inode-don-t-dump-the-whole-table-to-CLI.patch new file mode 100644 index 0000000..7e9d3c3 --- /dev/null +++ b/SOURCES/0043-inode-don-t-dump-the-whole-table-to-CLI.patch @@ -0,0 +1,137 @@ +From 416dfc70ef87400e1ddfd70e5b6e512d330b54a6 Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <sheetal.pamecha08@gmail.com> +Date: Tue, 2 Apr 2019 23:25:11 +0530 +Subject: [PATCH 43/52] inode: don't dump the whole table to CLI + +dumping the whole inode table detail to screen doesn't solve any +purpose. We should be getting only toplevel details on CLI, and +then if one wants to debug further, then they need to get to +'statedump' to get full details. + +Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22347/ + +BUG: 1578703 +Change-Id: Ie7e7f5a67c1606e3c18ce21ee6df6c7e4550c211 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166768 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-rpc-ops.c | 23 ++++++++++++++++++++++- + libglusterfs/src/inode.c | 13 +++++++++++++ + 2 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 78043cd..12e7fcc 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -7606,15 +7606,24 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + uint32_t active_size = 0; + uint32_t lru_size = 0; + uint32_t purge_size = 0; ++ uint32_t lru_limit = 0; + int i = 0; + + GF_ASSERT(dict); + GF_ASSERT(prefix); + ++ snprintf(key, sizeof(key), "%s.lru_limit", prefix); ++ ret = dict_get_uint32(dict, key, &lru_limit); ++ if (ret) ++ goto out; ++ cli_out("LRU limit : %u", lru_limit); ++ + snprintf(key, sizeof(key), "%s.active_size", prefix); + ret = dict_get_uint32(dict, key, &active_size); + if (ret) + goto out; ++ ++#ifdef DEBUG + if (active_size != 0) { + cli_out("Active inodes:"); + cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type"); +@@ -7626,10 +7635,16 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + } + cli_out(" "); + ++#else ++ cli_out("Active Inodes : %u", active_size); ++ ++#endif + snprintf(key, sizeof(key), "%s.lru_size", prefix); + ret = dict_get_uint32(dict, key, &lru_size); + if (ret) + goto out; ++ ++#ifdef DEBUG + if (lru_size != 0) { + cli_out("LRU inodes:"); + cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type"); +@@ -7640,11 +7655,15 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + cli_print_volume_status_inode_entry(dict, key); + } + cli_out(" "); ++#else ++ cli_out("LRU Inodes : %u", lru_size); ++#endif + + snprintf(key, sizeof(key), "%s.purge_size", prefix); + ret = dict_get_uint32(dict, key, &purge_size); + if (ret) + goto out; ++#ifdef DEBUG + if (purge_size != 0) { + cli_out("Purged inodes:"); + cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type"); +@@ -7654,7 +7673,9 @@ cli_print_volume_status_itables(dict_t *dict, char *prefix) + snprintf(key, sizeof(key), "%s.purge%d", prefix, i); + cli_print_volume_status_inode_entry(dict, key); + } +- ++#else ++ cli_out("Purge Inodes : %u", purge_size); ++#endif + out: + return; + } +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 87f74e0..96ddea5 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -2598,6 +2598,11 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) + if (ret) + return; + ++ snprintf(key, sizeof(key), "%s.itable.lru_limit", prefix); ++ ret = dict_set_uint32(dict, key, itable->lru_limit); ++ if (ret) ++ goto out; ++ + snprintf(key, sizeof(key), "%s.itable.active_size", prefix); + ret = dict_set_uint32(dict, key, itable->active_size); + if (ret) +@@ -2613,6 +2618,13 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) + if (ret) + goto out; + ++#ifdef DEBUG ++ /* Dumping inode details in dictionary and sending it to CLI is not ++ required as when a developer (or support team) asks for this command ++ output, they just want to get top level detail of inode table. ++ If one wants to debug, let them take statedump and debug, this ++ wouldn't be available in CLI during production setup. ++ */ + list_for_each_entry(inode, &itable->active, list) + { + snprintf(key, sizeof(key), "%s.itable.active%d", prefix, count++); +@@ -2632,6 +2644,7 @@ inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict) + snprintf(key, sizeof(key), "%s.itable.purge%d", prefix, count++); + inode_dump_to_dict(inode, key, dict); + } ++#endif + + out: + pthread_mutex_unlock(&itable->lock); +-- +1.8.3.1 + diff --git a/SOURCES/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch b/SOURCES/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch new file mode 100644 index 0000000..30ab28b --- /dev/null +++ b/SOURCES/0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch @@ -0,0 +1,360 @@ +From bc6588890ce94101a63b861178cf38db5549d8a8 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Wed, 28 Nov 2018 11:22:52 +0530 +Subject: [PATCH 44/52] cluster/ec: Don't enqueue an entry if it is already + healing + +Problem: +1 - heal-wait-qlength is by default 128. If shd is disabled +and we need to heal files, client side heal is needed. +If we access these files that will trigger the heal. +However, it has been observed that a file will be enqueued +multiple times in the heal wait queue, which in turn causes +queue to be filled and prevent other files to be enqueued. + +2 - While a file is going through healing and a write fop from +mount comes on that file, it sends write on all the bricks including +healing one. At the end it updates version and size on all the +bricks. However, it does not unset dirty flag on all the bricks, +even if this write fop was successful on all the bricks. +After healing completion this dirty flag remain set and never +gets cleaned up if SHD is disabled. + +Solution: +1 - If an entry is already in queue or going through heal process, +don't enqueue next client side request to heal the same file. + +2 - Unset dirty on all the bricks at the end if fop has succeeded on +all the bricks even if some of the bricks are going through heal. + +backport of : https://review.gluster.org/#/c/glusterfs/+/21744/ + +Change-Id: Ia61ffe230c6502ce6cb934425d55e2f40dd1a727 +BUG: 1600918 +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166296 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/ec/bug-1236065.t | 1 - + xlators/cluster/ec/src/ec-common.c | 43 +++++++++------ + xlators/cluster/ec/src/ec-common.h | 8 +++ + xlators/cluster/ec/src/ec-heal.c | 104 +++++++++++++++++++++++++++++++----- + xlators/cluster/ec/src/ec-helpers.c | 1 + + xlators/cluster/ec/src/ec-types.h | 1 + + 6 files changed, 127 insertions(+), 31 deletions(-) + +diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t +index 76d25d7..9181e73 100644 +--- a/tests/bugs/ec/bug-1236065.t ++++ b/tests/bugs/ec/bug-1236065.t +@@ -85,7 +85,6 @@ TEST pidof glusterd + EXPECT "$V0" volinfo_field $V0 'Volume Name' + EXPECT 'Started' volinfo_field $V0 'Status' + EXPECT '7' online_brick_count +- + ## cleanup + cd + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 8d65670..5183680 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -313,14 +313,15 @@ ec_check_status(ec_fop_data_t *fop) + + gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS, + "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, " +- "remaining=%s, good=%s, bad=%s)", ++ "remaining=%s, good=%s, bad=%s, %s)", + gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes, + ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes), + ec_bin(str2, sizeof(str2), fop->mask, ec->nodes), + ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes), + ec_bin(str4, sizeof(str4), fop->good, ec->nodes), + ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good), +- ec->nodes)); ++ ec->nodes), ++ ec_msg_str(fop)); + if (fop->use_fd) { + if (fop->fd != NULL) { + ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL, +@@ -2371,37 +2372,47 @@ ec_update_info(ec_lock_link_t *link) + uint64_t dirty[2] = {0, 0}; + uint64_t size; + ec_t *ec = NULL; ++ uintptr_t mask; + + lock = link->lock; + ctx = lock->ctx; + ec = link->fop->xl->private; + + /* pre_version[*] will be 0 if have_version is false */ +- version[0] = ctx->post_version[0] - ctx->pre_version[0]; +- version[1] = ctx->post_version[1] - ctx->pre_version[1]; ++ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] - ++ ctx->pre_version[EC_DATA_TXN]; ++ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] - ++ ctx->pre_version[EC_METADATA_TXN]; + + size = ctx->post_size - ctx->pre_size; + /* If we set the dirty flag for update fop, we have to unset it. + * If fop has failed on some bricks, leave the dirty as marked. */ ++ + if (lock->unlock_now) { ++ if (version[EC_DATA_TXN]) { ++ /*A data fop will have difference in post and pre version ++ *and for data fop we send writes on healing bricks also */ ++ mask = lock->good_mask | lock->healing; ++ } else { ++ mask = lock->good_mask; ++ } + /* Ensure that nodes are up while doing final + * metadata update.*/ +- if (!(ec->node_mask & ~lock->good_mask) && +- !(ec->node_mask & ~ec->xl_up)) { +- if (ctx->dirty[0] != 0) { +- dirty[0] = -1; ++ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) { ++ if (ctx->dirty[EC_DATA_TXN] != 0) { ++ dirty[EC_DATA_TXN] = -1; + } +- if (ctx->dirty[1] != 0) { +- dirty[1] = -1; ++ if (ctx->dirty[EC_METADATA_TXN] != 0) { ++ dirty[EC_METADATA_TXN] = -1; + } + /*If everything is fine and we already + *have version xattr set on entry, there + *is no need to update version again*/ +- if (ctx->pre_version[0]) { +- version[0] = 0; ++ if (ctx->pre_version[EC_DATA_TXN]) { ++ version[EC_DATA_TXN] = 0; + } +- if (ctx->pre_version[1]) { +- version[1] = 0; ++ if (ctx->pre_version[EC_METADATA_TXN]) { ++ version[EC_METADATA_TXN] = 0; + } + } else { + link->optimistic_changelog = _gf_false; +@@ -2410,8 +2421,8 @@ ec_update_info(ec_lock_link_t *link) + memset(ctx->dirty, 0, sizeof(ctx->dirty)); + } + +- if ((version[0] != 0) || (version[1] != 0) || (dirty[0] != 0) || +- (dirty[1] != 0)) { ++ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) || ++ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) { + ec_update_size_version(link, version, size, dirty); + return _gf_true; + } +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index 115e147..54aaa77 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -190,4 +190,12 @@ ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, + void + ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index, + int32_t ret_status); ++gf_boolean_t ++ec_is_entry_healing(ec_fop_data_t *fop); ++void ++ec_set_entry_healing(ec_fop_data_t *fop); ++void ++ec_reset_entry_healing(ec_fop_data_t *fop); ++char * ++ec_msg_str(ec_fop_data_t *fop); + #endif /* __EC_COMMON_H__ */ +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index eaf80e0..1ca12c1 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -103,6 +103,48 @@ ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata) + } + /* FOP: heal */ + ++void ++ec_set_entry_healing(ec_fop_data_t *fop) ++{ ++ ec_inode_t *ctx = NULL; ++ loc_t *loc = NULL; ++ ++ if (!fop) ++ return; ++ ++ loc = &fop->loc[0]; ++ LOCK(&loc->inode->lock); ++ { ++ ctx = __ec_inode_get(loc->inode, fop->xl); ++ if (ctx) { ++ ctx->heal_count += 1; ++ } ++ } ++ UNLOCK(&loc->inode->lock); ++} ++ ++void ++ec_reset_entry_healing(ec_fop_data_t *fop) ++{ ++ ec_inode_t *ctx = NULL; ++ loc_t *loc = NULL; ++ int32_t heal_count = 0; ++ if (!fop) ++ return; ++ ++ loc = &fop->loc[0]; ++ LOCK(&loc->inode->lock); ++ { ++ ctx = __ec_inode_get(loc->inode, fop->xl); ++ if (ctx) { ++ ctx->heal_count += -1; ++ heal_count = ctx->heal_count; ++ } ++ } ++ UNLOCK(&loc->inode->lock); ++ GF_ASSERT(heal_count >= 0); ++} ++ + uintptr_t + ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood) + { +@@ -2507,17 +2549,6 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); + goto out; + } +- +- msources = alloca0(ec->nodes); +- mhealed_sinks = alloca0(ec->nodes); +- ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); +- if (ret == 0) { +- mgood = ec_char_array_to_mask(msources, ec->nodes); +- mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); +- } else { +- op_ret = -1; +- op_errno = -ret; +- } + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + if (IA_ISREG(loc->inode->ia_type)) { +@@ -2538,8 +2569,19 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + op_ret = -1; + op_errno = -ret; + } ++ msources = alloca0(ec->nodes); ++ mhealed_sinks = alloca0(ec->nodes); ++ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks); ++ if (ret == 0) { ++ mgood = ec_char_array_to_mask(msources, ec->nodes); ++ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes); ++ } else { ++ op_ret = -1; ++ op_errno = -ret; ++ } + + out: ++ ec_reset_entry_healing(fop); + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), +@@ -2650,11 +2692,33 @@ ec_handle_healers_done(ec_fop_data_t *fop) + ec_launch_heal(ec, heal_fop); + } + ++gf_boolean_t ++ec_is_entry_healing(ec_fop_data_t *fop) ++{ ++ ec_inode_t *ctx = NULL; ++ int32_t heal_count = 0; ++ loc_t *loc = NULL; ++ ++ loc = &fop->loc[0]; ++ ++ LOCK(&loc->inode->lock); ++ { ++ ctx = __ec_inode_get(loc->inode, fop->xl); ++ if (ctx) { ++ heal_count = ctx->heal_count; ++ } ++ } ++ UNLOCK(&loc->inode->lock); ++ GF_ASSERT(heal_count >= 0); ++ return heal_count; ++} ++ + void + ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + { + gf_boolean_t can_heal = _gf_true; + ec_t *ec = this->private; ++ ec_fop_data_t *fop_rel = NULL; + + if (fop->req_frame == NULL) { + LOCK(&ec->lock); +@@ -2662,8 +2726,13 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + if ((ec->background_heals > 0) && + (ec->heal_wait_qlen + ec->background_heals) > + (ec->heal_waiters + ec->healers)) { +- list_add_tail(&fop->healer, &ec->heal_waiting); +- ec->heal_waiters++; ++ if (!ec_is_entry_healing(fop)) { ++ list_add_tail(&fop->healer, &ec->heal_waiting); ++ ec->heal_waiters++; ++ ec_set_entry_healing(fop); ++ } else { ++ fop_rel = fop; ++ } + fop = __ec_dequeue_heals(ec); + } else { + can_heal = _gf_false; +@@ -2673,8 +2742,12 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + } + + if (can_heal) { +- if (fop) ++ if (fop) { ++ if (fop->req_frame != NULL) { ++ ec_set_entry_healing(fop); ++ } + ec_launch_heal(ec, fop); ++ } + } else { + gf_msg_debug(this->name, 0, + "Max number of heals are " +@@ -2682,6 +2755,9 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + ec_fop_set_error(fop, EBUSY); + ec_heal_fail(ec, fop); + } ++ if (fop_rel) { ++ ec_heal_done(0, NULL, fop_rel); ++ } + } + + void +diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c +index e6b0359..43f6e3b 100644 +--- a/xlators/cluster/ec/src/ec-helpers.c ++++ b/xlators/cluster/ec/src/ec-helpers.c +@@ -717,6 +717,7 @@ __ec_inode_get(inode_t *inode, xlator_t *xl) + memset(ctx, 0, sizeof(*ctx)); + INIT_LIST_HEAD(&ctx->heal); + INIT_LIST_HEAD(&ctx->stripe_cache.lru); ++ ctx->heal_count = 0; + value = (uint64_t)(uintptr_t)ctx; + if (__inode_ctx_set(inode, xl, &value) != 0) { + GF_FREE(ctx); +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f3d63ca..6ae4a2b 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -171,6 +171,7 @@ struct _ec_inode { + gf_boolean_t have_config; + gf_boolean_t have_version; + gf_boolean_t have_size; ++ int32_t heal_count; + ec_config_t config; + uint64_t pre_version[2]; + uint64_t post_version[2]; +-- +1.8.3.1 + diff --git a/SOURCES/0045-glusterd-fix-txn-id-mem-leak.patch b/SOURCES/0045-glusterd-fix-txn-id-mem-leak.patch new file mode 100644 index 0000000..b9b2b3e --- /dev/null +++ b/SOURCES/0045-glusterd-fix-txn-id-mem-leak.patch @@ -0,0 +1,126 @@ +From 6c004c6c8b8f98f56e186740881520b8364e6f85 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 18 Mar 2019 16:08:04 +0530 +Subject: [PATCH 45/52] glusterd: fix txn-id mem leak + +This commit ensures the following: +1. Don't send commit op request to the remote nodes when gluster v +status all is executed as for the status all transaction the local +commit gets the name of the volumes and remote commit ops are +technically a no-op. So no need for additional rpc requests. +2. In op state machine flow, if the transaction is in staged state and +op_info.skip_locking is true, then no need to set the txn id in the +priv->glusterd_txn_opinfo dictionary which never gets freed. + +> Fixes: bz#1691164 +> Change-Id: Ib6a9300ea29633f501abac2ba53fb72ff648c822 +> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22388/ + +BUG: 1670415 +Change-Id: Ib6a9300ea29633f501abac2ba53fb72ff648c822 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166449 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 26 ++++++++++++++++++++------ + xlators/mgmt/glusterd/src/glusterd-syncop.c | 16 ++++++++++++++++ + 2 files changed, 36 insertions(+), 6 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index cbbb5d9..12d857a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -5652,6 +5652,9 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx) + dict_t *dict = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; ++ glusterd_op_info_t txn_op_info = { ++ {0}, ++ }; + + this = THIS; + GF_ASSERT(this); +@@ -5686,6 +5689,7 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx) + ret = -1; + goto out; + } ++ ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info); + + ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id)); + if (ret) { +@@ -5704,6 +5708,12 @@ out: + + gf_msg_debug(this->name, 0, "Returning with %d", ret); + ++ /* for no volname transactions, the txn_opinfo needs to be cleaned up ++ * as there's no unlock event triggered ++ */ ++ if (txn_op_info.skip_locking) ++ ret = glusterd_clear_txn_opinfo(txn_id); ++ + if (rsp_dict) + dict_unref(rsp_dict); + +@@ -8159,12 +8169,16 @@ glusterd_op_sm() + "Unable to clear " + "transaction's opinfo"); + } else { +- ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, 0, +- GD_MSG_TRANS_OPINFO_SET_FAIL, +- "Unable to set " +- "transaction's opinfo"); ++ if (!(event_type == GD_OP_EVENT_STAGE_OP && ++ opinfo.state.state == GD_OP_STATE_STAGED && ++ opinfo.skip_locking)) { ++ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_TRANS_OPINFO_SET_FAIL, ++ "Unable to set " ++ "transaction's opinfo"); ++ } + } + + glusterd_destroy_op_event_ctx(event); +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 1741cf8..618d8bc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1392,6 +1392,8 @@ gd_commit_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char *errstr = NULL; + struct syncargs args = {0}; + int type = GF_QUOTA_OPTION_TYPE_NONE; ++ uint32_t cmd = 0; ++ gf_boolean_t origin_glusterd = _gf_false; + + this = THIS; + GF_ASSERT(this); +@@ -1449,6 +1451,20 @@ commit_done: + gd_syncargs_init(&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; ++ origin_glusterd = is_origin_glusterd(req_dict); ++ ++ if (op == GD_OP_STATUS_VOLUME) { ++ ret = dict_get_uint32(req_dict, "cmd", &cmd); ++ if (ret) ++ goto out; ++ ++ if (origin_glusterd) { ++ if ((cmd & GF_CLI_STATUS_ALL)) { ++ ret = 0; ++ goto out; ++ } ++ } ++ } + + RCU_READ_LOCK; + cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list) +-- +1.8.3.1 + diff --git a/SOURCES/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch b/SOURCES/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch new file mode 100644 index 0000000..5365515 --- /dev/null +++ b/SOURCES/0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch @@ -0,0 +1,98 @@ +From a0661449cd8ba7b851fec473191733767f4541b8 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Thu, 28 Mar 2019 17:55:54 +0530 +Subject: [PATCH 46/52] protocol/client: Do not fallback to anon-fd if fd is + not open + +If an open comes on a file when a brick is down and after the brick comes up, +a fop comes on the fd, client xlator would still wind the fop on anon-fd +leading to wrong behavior of the fops in some cases. + +Example: +If lk fop is issued on the fd just after the brick is up in the scenario above, +lk fop will be sent on anon-fd instead of failing it on that client xlator. +This lock will never be freed upon close of the fd as flush on anon-fd is +invalid and is not wound below server xlator. + +As a fix, failing the fop unless the fd has FALLBACK_TO_ANON_FD flag. + + >Upstream-patch: https://review.gluster.org/c/glusterfs/+/15804 + >Change-Id: I77692d056660b2858e323bdabdfe0a381807cccc + >fixes bz#1390914 + +BUG: 1695057 +Change-Id: Id656bea8dde14327212fbe7ecc97519dc5b32098 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166833 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/protocol/bug-1390914.t | 36 ++++++++++++++++++++++++++++ + xlators/protocol/client/src/client-helpers.c | 8 ++++++- + 2 files changed, 43 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/protocol/bug-1390914.t + +diff --git a/tests/bugs/protocol/bug-1390914.t b/tests/bugs/protocol/bug-1390914.t +new file mode 100644 +index 0000000..e3dab92 +--- /dev/null ++++ b/tests/bugs/protocol/bug-1390914.t +@@ -0,0 +1,36 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fileio.rc ++cleanup; ++ ++#test that fops are not wound on anon-fd when fd is not open on that brick ++TEST glusterd; ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3}; ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume start $V0 ++TEST $CLI volume profile $V0 start ++TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; ++ ++TEST touch $M0/1 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST fd_open 200 'w' "$M0/1" ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++#lk should only happen on 2 bricks, if there is a bug, it will plant a lock ++#with anon-fd on first-brick which will never be released because flush won't ++#be wound below server xlator for anon-fd ++TEST flock -x -n 200 ++TEST fd_close 200 ++ ++TEST fd_open 200 'w' "$M0/1" ++#this lock will fail if there is a stale lock ++TEST flock -x -n 200 ++TEST fd_close 200 ++cleanup; +diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c +index 55e87b3..2dd7106 100644 +--- a/xlators/protocol/client/src/client-helpers.c ++++ b/xlators/protocol/client/src/client-helpers.c +@@ -419,7 +419,13 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd) + { + fdctx = this_fd_get_ctx(fd, this); + if (!fdctx) { +- *remote_fd = GF_ANON_FD_NO; ++ if (fd->anonymous) { ++ *remote_fd = GF_ANON_FD_NO; ++ } else { ++ *remote_fd = -1; ++ gf_msg_debug(this->name, EBADF, "not a valid fd for gfid: %s", ++ uuid_utoa(fd->inode->gfid)); ++ } + } else { + if (__is_fd_reopen_in_progress(fdctx)) + *remote_fd = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch b/SOURCES/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch new file mode 100644 index 0000000..cc71864 --- /dev/null +++ b/SOURCES/0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch @@ -0,0 +1,1652 @@ +From 83d816370f7540d4065baac704df65c648a03125 Mon Sep 17 00:00:00 2001 +From: Poornima G <pgurusid@redhat.com> +Date: Sun, 24 Mar 2019 09:40:50 +0530 +Subject: [PATCH 47/52] client-rpc: Fix the payload being sent on the wire + +The fops allocate 3 kind of payload(buffer) in the client xlator: +- fop payload, this is the buffer allocated by the write and put fop +- rsphdr paylod, this is the buffer required by the reply cbk of + some fops like lookup, readdir. +- rsp_paylod, this is the buffer required by the reply cbk of fops like + readv etc. + +Currently, in the lookup and readdir fop the rsphdr is sent as payload, +hence the allocated rsphdr buffer is also sent on the wire, increasing +the bandwidth consumption on the wire. + +With this patch, the issue is fixed. + +>Fixes: bz#1692093 +>Change-Id: Ie8158921f4db319e60ad5f52d851fa5c9d4a269b +>Signed-off-by: Poornima G <pgurusid@redhat.com> +>Backport-of: https://review.gluster.org/22402/ + +BUG: 1693935 +Change-Id: Id12746a4c9416288bc1387c8b018bbe9cc4b637d +Signed-off-by: Poornima G <pgurusid@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166535 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/protocol/client/src/client-handshake.c | 29 ++- + xlators/protocol/client/src/client-helpers.c | 14 +- + xlators/protocol/client/src/client-rpc-fops.c | 235 ++++++++++++---------- + xlators/protocol/client/src/client-rpc-fops_v2.c | 236 ++++++++++++----------- + xlators/protocol/client/src/client.c | 22 ++- + xlators/protocol/client/src/client.h | 16 +- + 6 files changed, 308 insertions(+), 244 deletions(-) + +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index f9631c5..c43756a 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -34,7 +34,6 @@ typedef struct client_fd_lk_local { + clnt_fd_ctx_t *fdctx; + } clnt_fd_lk_local_t; + +- + int32_t + client3_getspec(call_frame_t *frame, xlator_t *this, void *data) + { +@@ -201,8 +200,8 @@ clnt_release_reopen_fd(xlator_t *this, clnt_fd_ctx_t *fdctx) + req.fd = fdctx->remote_fd; + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RELEASE, +- clnt_release_reopen_fd_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfs3_releasedir_req); ++ clnt_release_reopen_fd_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_releasedir_req); + out: + if (ret) { + clnt_fd_lk_reacquire_failed(this, fdctx, conf); +@@ -486,8 +485,8 @@ protocol_client_reopendir(clnt_fd_ctx_t *fdctx, xlator_t *this) + frame->local = local; + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client3_3_reopendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_opendir_req); ++ client3_3_reopendir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-opendir request"); +@@ -547,8 +546,8 @@ protocol_client_reopenfile(clnt_fd_ctx_t *fdctx, xlator_t *this) + local->loc.path); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client3_3_reopen_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_open_req); ++ client3_3_reopen_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-open request"); +@@ -745,8 +744,8 @@ protocol_client_reopendir_v2(clnt_fd_ctx_t *fdctx, xlator_t *this) + frame->local = local; + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client4_0_reopendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_opendir_req); ++ client4_0_reopendir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-opendir request"); +@@ -806,8 +805,8 @@ protocol_client_reopenfile_v2(clnt_fd_ctx_t *fdctx, xlator_t *this) + local->loc.path); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client4_0_reopen_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_open_req); ++ client4_0_reopen_cbk, NULL, ++ (xdrproc_t)xdr_gfx_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DIR_OP_FAILED, + "failed to send the re-open request"); +@@ -1312,7 +1311,6 @@ client_setvolume(xlator_t *this, struct rpc_clnt *rpc) + + ret = client_submit_request(this, &req, fr, conf->handshake, + GF_HNDSK_SETVOLUME, client_setvolume_cbk, NULL, +- NULL, 0, NULL, 0, NULL, + (xdrproc_t)xdr_gf_setvolume_req); + + fail: +@@ -1522,8 +1520,7 @@ client_query_portmap(xlator_t *this, struct rpc_clnt *rpc) + + ret = client_submit_request(this, &req, fr, &clnt_pmap_prog, + GF_PMAP_PORTBYBRICK, client_query_portmap_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_pmap_port_by_brick_req); ++ NULL, (xdrproc_t)xdr_pmap_port_by_brick_req); + + fail: + return ret; +@@ -1624,8 +1621,8 @@ client_handshake(xlator_t *this, struct rpc_clnt *rpc) + + req.gfs_id = 0xbabe; + ret = client_submit_request(this, &req, frame, conf->dump, GF_DUMP_DUMP, +- client_dump_version_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gf_dump_req); ++ client_dump_version_cbk, NULL, ++ (xdrproc_t)xdr_gf_dump_req); + + out: + return ret; +diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c +index 2dd7106..53b4484 100644 +--- a/xlators/protocol/client/src/client-helpers.c ++++ b/xlators/protocol/client/src/client-helpers.c +@@ -3082,8 +3082,7 @@ send_release4_0_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + gf_msg_trace(this->name, 0, "sending releasedir on fd"); + (void)client_submit_request( + this, &req, fr, conf->fops, GFS3_OP_RELEASEDIR, +- client4_0_releasedir_cbk, NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_releasedir_req); ++ client4_0_releasedir_cbk, NULL, (xdrproc_t)xdr_gfx_releasedir_req); + } else { + gfx_release_req req = { + { +@@ -3094,8 +3093,8 @@ send_release4_0_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + req.fd = fdctx->remote_fd; + gf_msg_trace(this->name, 0, "sending release on fd"); + (void)client_submit_request(this, &req, fr, conf->fops, GFS3_OP_RELEASE, +- client4_0_release_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfx_release_req); ++ client4_0_release_cbk, NULL, ++ (xdrproc_t)xdr_gfx_release_req); + } + + return 0; +@@ -3118,8 +3117,7 @@ send_release3_3_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + gf_msg_trace(this->name, 0, "sending releasedir on fd"); + (void)client_submit_request( + this, &req, fr, conf->fops, GFS3_OP_RELEASEDIR, +- client3_3_releasedir_cbk, NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_releasedir_req); ++ client3_3_releasedir_cbk, NULL, (xdrproc_t)xdr_gfs3_releasedir_req); + } else { + gfs3_release_req req = { + { +@@ -3130,8 +3128,8 @@ send_release3_3_over_wire(xlator_t *this, clnt_fd_ctx_t *fdctx, + req.fd = fdctx->remote_fd; + gf_msg_trace(this->name, 0, "sending release on fd"); + (void)client_submit_request(this, &req, fr, conf->fops, GFS3_OP_RELEASE, +- client3_3_release_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfs3_release_req); ++ client3_3_release_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_release_req); + } + + return 0; +diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c +index b7df7cc..1c8b31b 100644 +--- a/xlators/protocol/client/src/client-rpc-fops.c ++++ b/xlators/protocol/client/src/client-rpc-fops.c +@@ -3234,11 +3234,13 @@ client3_3_lookup(call_frame_t *frame, xlator_t *this, void *data) + struct iobref *rsp_iobref = NULL; + struct iobuf *rsp_iobuf = NULL; + struct iovec *rsphdr = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + + memset(vector, 0, sizeof(vector)); ++ memset(&cp, 0, sizeof(client_payload_t)); + + conf = this->private; + args = data; +@@ -3288,9 +3290,12 @@ client3_3_lookup(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LOOKUP, +- client3_3_lookup_cbk, NULL, rsphdr, count, NULL, +- 0, local->iobref, ++ client3_3_lookup_cbk, &cp, + (xdrproc_t)xdr_gfs3_lookup_req); + + if (ret) { +@@ -3338,8 +3343,8 @@ client3_3_stat(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STAT, +- client3_3_stat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_stat_req); ++ client3_3_stat_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_stat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3381,8 +3386,8 @@ client3_3_truncate(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_TRUNCATE, +- client3_3_truncate_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_truncate_req); ++ client3_3_truncate_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_truncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3425,8 +3430,7 @@ client3_3_ftruncate(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FTRUNCATE, client3_3_ftruncate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_ftruncate_req); ++ NULL, (xdrproc_t)xdr_gfs3_ftruncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3468,8 +3472,8 @@ client3_3_access(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ACCESS, +- client3_3_access_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_access_req); ++ client3_3_access_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_access_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3505,10 +3509,12 @@ client3_3_readlink(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + + conf = this->private; +@@ -3547,9 +3553,11 @@ client3_3_readlink(call_frame_t *frame, xlator_t *this, void *data) + rsp_iobuf = NULL; + rsp_iobref = NULL; + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READLINK, +- client3_3_readlink_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_readlink_cbk, &cp, + (xdrproc_t)xdr_gfs3_readlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -3595,8 +3603,8 @@ client3_3_unlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_UNLINK, +- client3_3_unlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_unlink_req); ++ client3_3_unlink_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_unlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3638,8 +3646,8 @@ client3_3_rmdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RMDIR, +- client3_3_rmdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_rmdir_req); ++ client3_3_rmdir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_rmdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3697,8 +3705,8 @@ client3_3_symlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SYMLINK, +- client3_3_symlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_symlink_req); ++ client3_3_symlink_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_symlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3743,8 +3751,8 @@ client3_3_rename(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RENAME, +- client3_3_rename_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_rename_req); ++ client3_3_rename_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_rename_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3802,8 +3810,8 @@ client3_3_link(call_frame_t *frame, xlator_t *this, void *data) + loc_path(&local->loc2, NULL); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LINK, +- client3_3_link_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_link_req); ++ client3_3_link_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_link_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3857,8 +3865,8 @@ client3_3_mknod(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKNOD, +- client3_3_mknod_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_mknod_req); ++ client3_3_mknod_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_mknod_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3924,8 +3932,8 @@ client3_3_mkdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKDIR, +- client3_3_mkdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_mkdir_req); ++ client3_3_mkdir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_mkdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3982,8 +3990,8 @@ client3_3_create(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_CREATE, +- client3_3_create_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_create_req); ++ client3_3_create_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_create_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4043,8 +4051,8 @@ client3_3_open(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client3_3_open_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_open_req); ++ client3_3_open_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4079,10 +4087,12 @@ client3_3_readv(call_frame_t *frame, xlator_t *this, void *data) + }; + struct iobuf *rsp_iobuf = NULL; + struct iobref *rsp_iobref = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4130,9 +4140,12 @@ client3_3_readv(call_frame_t *frame, xlator_t *this, void *data) + local->iobref = rsp_iobref; + rsp_iobref = NULL; + ++ cp.rsp_payload = &rsp_vec; ++ cp.rsp_payload_cnt = 1; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READ, +- client3_3_readv_cbk, NULL, NULL, 0, &rsp_vec, 1, +- local->iobref, (xdrproc_t)xdr_gfs3_read_req); ++ client3_3_readv_cbk, &cp, ++ (xdrproc_t)xdr_gfs3_read_req); + if (ret) { + // unwind is done in the cbk + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4167,10 +4180,12 @@ client3_3_writev(call_frame_t *frame, xlator_t *this, void *data) + }; + int op_errno = ESTALE; + int ret = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4187,9 +4202,12 @@ client3_3_writev(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.iobref = args->iobref; ++ cp.payload = args->vector; ++ cp.payload_cnt = args->count; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_WRITE, +- client3_3_writev_cbk, args->iobref, +- args->vector, args->count, NULL, 0, NULL, ++ client3_3_writev_cbk, &cp, + (xdrproc_t)xdr_gfs3_write_req); + if (ret) { + /* +@@ -4248,8 +4266,8 @@ client3_3_flush(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH, +- client3_3_flush_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_flush_req); ++ client3_3_flush_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_flush_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4291,8 +4309,8 @@ client3_3_fsync(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNC, +- client3_3_fsync_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fsync_req); ++ client3_3_fsync_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fsync_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4336,8 +4354,8 @@ client3_3_fstat(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSTAT, +- client3_3_fstat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fstat_req); ++ client3_3_fstat_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fstat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4391,8 +4409,8 @@ client3_3_opendir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client3_3_opendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_opendir_req); ++ client3_3_opendir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4435,8 +4453,8 @@ client3_3_fsyncdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNCDIR, +- client3_3_fsyncdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fsyncdir_req); ++ client3_3_fsyncdir_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fsyncdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4479,8 +4497,8 @@ client3_3_statfs(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STATFS, +- client3_3_statfs_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_statfs_req); ++ client3_3_statfs_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_statfs_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4523,8 +4541,8 @@ client3_3_setxattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETXATTR, +- client3_3_setxattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_setxattr_req); ++ client3_3_setxattr_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_setxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4570,8 +4588,7 @@ client3_3_fsetxattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FSETXATTR, client3_3_fsetxattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_fsetxattr_req); ++ NULL, (xdrproc_t)xdr_gfs3_fsetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4611,10 +4628,12 @@ client3_3_fgetxattr(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4654,9 +4673,12 @@ client3_3_fgetxattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, +- GFS3_OP_FGETXATTR, client3_3_fgetxattr_cbk, +- NULL, rsphdr, count, NULL, 0, local->iobref, ++ GFS3_OP_FGETXATTR, client3_3_fgetxattr_cbk, &cp, + (xdrproc_t)xdr_gfs3_fgetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4699,11 +4721,14 @@ client3_3_getxattr(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) { + op_errno = 0; + goto unwind; + } ++ ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + + local = mem_get0(this->local_pool); +@@ -4775,9 +4800,12 @@ client3_3_getxattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_GETXATTR, +- client3_3_getxattr_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_getxattr_cbk, &cp, + (xdrproc_t)xdr_gfs3_getxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4822,10 +4850,12 @@ client3_3_xattrop(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + + if (!(args->loc && args->loc->inode)) +@@ -4871,9 +4901,12 @@ client3_3_xattrop(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_XATTROP, +- client3_3_xattrop_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_xattrop_cbk, &cp, + (xdrproc_t)xdr_gfs3_xattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4918,10 +4951,12 @@ client3_3_fxattrop(call_frame_t *frame, xlator_t *this, void *data) + struct iovec vector[MAX_IOVEC] = { + {0}, + }; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -4962,9 +4997,11 @@ client3_3_fxattrop(call_frame_t *frame, xlator_t *this, void *data) + rsp_iobuf = NULL; + rsp_iobref = NULL; + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FXATTROP, +- client3_3_fxattrop_cbk, NULL, rsphdr, count, +- NULL, 0, local->iobref, ++ client3_3_fxattrop_cbk, &cp, + (xdrproc_t)xdr_gfs3_fxattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5016,8 +5053,7 @@ client3_3_removexattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_REMOVEXATTR, client3_3_removexattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_removexattr_req); ++ NULL, (xdrproc_t)xdr_gfs3_removexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5059,10 +5095,9 @@ client3_3_fremovexattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } +- ret = client_submit_request(this, &req, frame, conf->fops, +- GFS3_OP_FREMOVEXATTR, +- client3_3_fremovexattr_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfs3_fremovexattr_req); ++ ret = client_submit_request( ++ this, &req, frame, conf->fops, GFS3_OP_FREMOVEXATTR, ++ client3_3_fremovexattr_cbk, NULL, (xdrproc_t)xdr_gfs3_fremovexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5104,8 +5139,8 @@ client3_3_lease(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LEASE, +- client3_3_lease_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_lease_req); ++ client3_3_lease_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_lease_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5167,7 +5202,7 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK, +- client3_3_lk_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client3_3_lk_cbk, NULL, + (xdrproc_t)xdr_gfs3_lk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5210,8 +5245,8 @@ client3_3_inodelk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_INODELK, +- client3_3_inodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_inodelk_req); ++ client3_3_inodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_inodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5260,8 +5295,8 @@ client3_3_finodelk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FINODELK, +- client3_3_finodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_finodelk_req); ++ client3_3_finodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_finodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5305,8 +5340,8 @@ client3_3_entrylk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ENTRYLK, +- client3_3_entrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_entrylk_req); ++ client3_3_entrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_entrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5349,8 +5384,8 @@ client3_3_fentrylk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FENTRYLK, +- client3_3_fentrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fentrylk_req); ++ client3_3_fentrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fentrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5391,8 +5426,7 @@ client3_3_rchecksum(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_RCHECKSUM, client3_3_rchecksum_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_rchecksum_req); ++ NULL, (xdrproc_t)xdr_gfs3_rchecksum_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5433,10 +5467,12 @@ client3_3_readdir(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + int readdir_rsp_size = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5486,9 +5522,11 @@ client3_3_readdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIR, +- client3_3_readdir_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client3_3_readdir_cbk, &cp, + (xdrproc_t)xdr_gfs3_readdir_req); + + if (ret) { +@@ -5534,10 +5572,12 @@ client3_3_readdirp(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + clnt_local_t *local = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5587,9 +5627,11 @@ client3_3_readdirp(call_frame_t *frame, xlator_t *this, void *data) + + local->fd = fd_ref(args->fd); + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIRP, +- client3_3_readdirp_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client3_3_readdirp_cbk, &cp, + (xdrproc_t)xdr_gfs3_readdirp_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5637,8 +5679,8 @@ client3_3_setattr(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETATTR, +- client3_3_setattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_setattr_req); ++ client3_3_setattr_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_setattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5679,8 +5721,8 @@ client3_3_fsetattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSETATTR, +- client3_3_fsetattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_fsetattr_req); ++ client3_3_fsetattr_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_fsetattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5722,8 +5764,7 @@ client3_3_fallocate(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FALLOCATE, client3_3_fallocate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_fallocate_req); ++ NULL, (xdrproc_t)xdr_gfs3_fallocate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5764,8 +5805,8 @@ client3_3_discard(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_DISCARD, +- client3_3_discard_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_discard_req); ++ client3_3_discard_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_discard_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5807,8 +5848,8 @@ client3_3_zerofill(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ZEROFILL, +- client3_3_zerofill_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_zerofill_req); ++ client3_3_zerofill_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_zerofill_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5850,7 +5891,7 @@ client3_3_ipc(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_IPC, +- client3_3_ipc_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client3_3_ipc_cbk, NULL, + (xdrproc_t)xdr_gfs3_ipc_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5895,8 +5936,8 @@ client3_3_seek(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SEEK, +- client3_3_seek_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfs3_seek_req); ++ client3_3_seek_cbk, NULL, ++ (xdrproc_t)xdr_gfs3_seek_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -6083,8 +6124,7 @@ client3_3_getactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_GETACTIVELK, client3_3_getactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_getactivelk_req); ++ NULL, (xdrproc_t)xdr_gfs3_getactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -6141,8 +6181,7 @@ client3_3_setactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_SETACTIVELK, client3_3_setactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfs3_setactivelk_req); ++ NULL, (xdrproc_t)xdr_gfs3_setactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c +index 8f3ee41..2673b6e 100644 +--- a/xlators/protocol/client/src/client-rpc-fops_v2.c ++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c +@@ -3005,11 +3005,13 @@ client4_0_lookup(call_frame_t *frame, xlator_t *this, void *data) + struct iobref *rsp_iobref = NULL; + struct iobuf *rsp_iobuf = NULL; + struct iovec *rsphdr = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + + memset(vector, 0, sizeof(vector)); ++ memset(&cp, 0, sizeof(client_payload_t)); + + conf = this->private; + args = data; +@@ -3059,9 +3061,12 @@ client4_0_lookup(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } ++ ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LOOKUP, +- client4_0_lookup_cbk, NULL, rsphdr, count, NULL, +- 0, local->iobref, ++ client4_0_lookup_cbk, &cp, + (xdrproc_t)xdr_gfx_lookup_req); + + if (ret) { +@@ -3109,8 +3114,8 @@ client4_0_stat(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STAT, +- client4_0_stat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_stat_req); ++ client4_0_stat_cbk, NULL, ++ (xdrproc_t)xdr_gfx_stat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3153,8 +3158,8 @@ client4_0_truncate(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_TRUNCATE, +- client4_0_truncate_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_truncate_req); ++ client4_0_truncate_cbk, NULL, ++ (xdrproc_t)xdr_gfx_truncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3198,8 +3203,7 @@ client4_0_ftruncate(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FTRUNCATE, client4_0_ftruncate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_ftruncate_req); ++ NULL, (xdrproc_t)xdr_gfx_ftruncate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3241,8 +3245,8 @@ client4_0_access(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ACCESS, +- client4_0_access_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_access_req); ++ client4_0_access_cbk, NULL, ++ (xdrproc_t)xdr_gfx_access_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3295,8 +3299,8 @@ client4_0_readlink(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READLINK, +- client4_0_readlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_readlink_req); ++ client4_0_readlink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_readlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3338,8 +3342,8 @@ client4_0_unlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_UNLINK, +- client4_0_unlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_unlink_req); ++ client4_0_unlink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_unlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3381,8 +3385,8 @@ client4_0_rmdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RMDIR, +- client4_0_rmdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_rmdir_req); ++ client4_0_rmdir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_rmdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3440,8 +3444,8 @@ client4_0_symlink(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SYMLINK, +- client4_0_symlink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_symlink_req); ++ client4_0_symlink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_symlink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3486,8 +3490,8 @@ client4_0_rename(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_RENAME, +- client4_0_rename_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_rename_req); ++ client4_0_rename_cbk, NULL, ++ (xdrproc_t)xdr_gfx_rename_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3546,8 +3550,8 @@ client4_0_link(call_frame_t *frame, xlator_t *this, void *data) + loc_path(&local->loc2, NULL); + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LINK, +- client4_0_link_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_link_req); ++ client4_0_link_cbk, NULL, ++ (xdrproc_t)xdr_gfx_link_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3601,8 +3605,8 @@ client4_0_mknod(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKNOD, +- client4_0_mknod_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_mknod_req); ++ client4_0_mknod_cbk, NULL, ++ (xdrproc_t)xdr_gfx_mknod_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3668,8 +3672,8 @@ client4_0_mkdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_MKDIR, +- client4_0_mkdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_mkdir_req); ++ client4_0_mkdir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_mkdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3726,8 +3730,8 @@ client4_0_create(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_CREATE, +- client4_0_create_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_create_req); ++ client4_0_create_cbk, NULL, ++ (xdrproc_t)xdr_gfx_create_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3787,8 +3791,8 @@ client4_0_open(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPEN, +- client4_0_open_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_open_req); ++ client4_0_open_cbk, NULL, ++ (xdrproc_t)xdr_gfx_open_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3823,10 +3827,12 @@ client4_0_readv(call_frame_t *frame, xlator_t *this, void *data) + }; + struct iobuf *rsp_iobuf = NULL; + struct iobref *rsp_iobref = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -3872,9 +3878,12 @@ client4_0_readv(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.rsp_payload = &rsp_vec; ++ cp.rsp_payload_cnt = 1; ++ cp.rsp_iobref = local->iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READ, +- client4_0_readv_cbk, NULL, NULL, 0, &rsp_vec, 1, +- local->iobref, (xdrproc_t)xdr_gfx_read_req); ++ client4_0_readv_cbk, &cp, ++ (xdrproc_t)xdr_gfx_read_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -3905,10 +3914,12 @@ client4_0_writev(call_frame_t *frame, xlator_t *this, void *data) + }; + int op_errno = ESTALE; + int ret = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -3926,9 +3937,11 @@ client4_0_writev(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.iobref = args->iobref; ++ cp.payload = args->vector; ++ cp.payload_cnt = args->count; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_WRITE, +- client4_0_writev_cbk, args->iobref, +- args->vector, args->count, NULL, 0, NULL, ++ client4_0_writev_cbk, &cp, + (xdrproc_t)xdr_gfx_write_req); + if (ret) { + /* +@@ -3987,8 +4000,8 @@ client4_0_flush(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FLUSH, +- client4_0_flush_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_flush_req); ++ client4_0_flush_cbk, NULL, ++ (xdrproc_t)xdr_gfx_flush_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4030,8 +4043,8 @@ client4_0_fsync(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNC, +- client4_0_fsync_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fsync_req); ++ client4_0_fsync_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fsync_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4075,8 +4088,8 @@ client4_0_fstat(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSTAT, +- client4_0_fstat_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fstat_req); ++ client4_0_fstat_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fstat_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4130,8 +4143,8 @@ client4_0_opendir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_OPENDIR, +- client4_0_opendir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_opendir_req); ++ client4_0_opendir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_opendir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4175,8 +4188,8 @@ client4_0_fsyncdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSYNCDIR, +- client4_0_fsyncdir_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fsyncdir_req); ++ client4_0_fsyncdir_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fsyncdir_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4219,8 +4232,8 @@ client4_0_statfs(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_STATFS, +- client4_0_statfs_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_statfs_req); ++ client4_0_statfs_cbk, NULL, ++ (xdrproc_t)xdr_gfx_statfs_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4263,8 +4276,8 @@ client4_0_setxattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETXATTR, +- client4_0_setxattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_setxattr_req); ++ client4_0_setxattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_setxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4310,8 +4323,7 @@ client4_0_fsetxattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FSETXATTR, client4_0_fsetxattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_fsetxattr_req); ++ NULL, (xdrproc_t)xdr_gfx_fsetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4364,8 +4376,7 @@ client4_0_fgetxattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FGETXATTR, client4_0_fgetxattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_fgetxattr_req); ++ NULL, (xdrproc_t)xdr_gfx_fgetxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4452,8 +4463,8 @@ client4_0_getxattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_GETXATTR, +- client4_0_getxattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_getxattr_req); ++ client4_0_getxattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_getxattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4514,8 +4525,8 @@ client4_0_xattrop(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_XATTROP, +- client4_0_xattrop_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_xattrop_req); ++ client4_0_xattrop_cbk, NULL, ++ (xdrproc_t)xdr_gfx_xattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4566,8 +4577,8 @@ client4_0_fxattrop(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FXATTROP, +- client4_0_fxattrop_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fxattrop_req); ++ client4_0_fxattrop_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fxattrop_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4613,8 +4624,7 @@ client4_0_removexattr(call_frame_t *frame, xlator_t *this, void *data) + } + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_REMOVEXATTR, client4_0_removexattr_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_removexattr_req); ++ NULL, (xdrproc_t)xdr_gfx_removexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4656,10 +4666,9 @@ client4_0_fremovexattr(call_frame_t *frame, xlator_t *this, void *data) + op_errno = -ret; + goto unwind; + } +- ret = client_submit_request(this, &req, frame, conf->fops, +- GFS3_OP_FREMOVEXATTR, +- client4_0_fremovexattr_cbk, NULL, NULL, 0, NULL, +- 0, NULL, (xdrproc_t)xdr_gfx_fremovexattr_req); ++ ret = client_submit_request( ++ this, &req, frame, conf->fops, GFS3_OP_FREMOVEXATTR, ++ client4_0_fremovexattr_cbk, NULL, (xdrproc_t)xdr_gfx_fremovexattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4701,8 +4710,8 @@ client4_0_lease(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LEASE, +- client4_0_lease_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_lease_req); ++ client4_0_lease_cbk, NULL, ++ (xdrproc_t)xdr_gfx_lease_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4764,7 +4773,7 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_LK, +- client4_0_lk_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client4_0_lk_cbk, NULL, + (xdrproc_t)xdr_gfx_lk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -4807,8 +4816,8 @@ client4_0_inodelk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_INODELK, +- client4_0_inodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_inodelk_req); ++ client4_0_inodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_inodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4857,8 +4866,8 @@ client4_0_finodelk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FINODELK, +- client4_0_finodelk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_finodelk_req); ++ client4_0_finodelk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_finodelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4902,8 +4911,8 @@ client4_0_entrylk(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ENTRYLK, +- client4_0_entrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_entrylk_req); ++ client4_0_entrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_entrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4946,8 +4955,8 @@ client4_0_fentrylk(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FENTRYLK, +- client4_0_fentrylk_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fentrylk_req); ++ client4_0_fentrylk_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fentrylk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -4988,10 +4997,12 @@ client4_0_readdir(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + int readdir_rsp_size = 0; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5041,9 +5052,11 @@ client4_0_readdir(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIR, +- client4_0_readdir_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client4_0_readdir_cbk, &cp, + (xdrproc_t)xdr_gfx_readdir_req); + + if (ret) { +@@ -5089,10 +5102,12 @@ client4_0_readdirp(call_frame_t *frame, xlator_t *this, void *data) + {0}, + }; + clnt_local_t *local = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5142,9 +5157,11 @@ client4_0_readdirp(call_frame_t *frame, xlator_t *this, void *data) + + local->fd = fd_ref(args->fd); + ++ cp.rsphdr = rsphdr; ++ cp.rsphdr_cnt = count; ++ cp.rsp_iobref = rsp_iobref; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_READDIRP, +- client4_0_readdirp_cbk, NULL, rsphdr, count, +- NULL, 0, rsp_iobref, ++ client4_0_readdirp_cbk, &cp, + (xdrproc_t)xdr_gfx_readdirp_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5192,8 +5209,8 @@ client4_0_setattr(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SETATTR, +- client4_0_setattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_setattr_req); ++ client4_0_setattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_setattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5235,8 +5252,7 @@ client4_0_fallocate(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_FALLOCATE, client4_0_fallocate_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_fallocate_req); ++ NULL, (xdrproc_t)xdr_gfx_fallocate_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5277,8 +5293,8 @@ client4_0_discard(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_DISCARD, +- client4_0_discard_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_discard_req); ++ client4_0_discard_cbk, NULL, ++ (xdrproc_t)xdr_gfx_discard_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5320,8 +5336,8 @@ client4_0_zerofill(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ZEROFILL, +- client4_0_zerofill_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_zerofill_req); ++ client4_0_zerofill_cbk, NULL, ++ (xdrproc_t)xdr_gfx_zerofill_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5363,7 +5379,7 @@ client4_0_ipc(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_IPC, +- client4_0_ipc_cbk, NULL, NULL, 0, NULL, 0, NULL, ++ client4_0_ipc_cbk, NULL, + (xdrproc_t)xdr_gfx_ipc_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +@@ -5408,8 +5424,8 @@ client4_0_seek(call_frame_t *frame, xlator_t *this, void *data) + } + + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_SEEK, +- client4_0_seek_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_seek_req); ++ client4_0_seek_cbk, NULL, ++ (xdrproc_t)xdr_gfx_seek_req); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5458,8 +5474,7 @@ client4_0_getactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_GETACTIVELK, client4_0_getactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_getactivelk_req); ++ NULL, (xdrproc_t)xdr_gfx_getactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5514,8 +5529,7 @@ client4_0_setactivelk(call_frame_t *frame, xlator_t *this, void *data) + + ret = client_submit_request(this, &req, frame, conf->fops, + GFS3_OP_SETACTIVELK, client4_0_setactivelk_cbk, +- NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_setactivelk_req); ++ NULL, (xdrproc_t)xdr_gfx_setactivelk_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5785,8 +5799,8 @@ client4_0_namelink(call_frame_t *frame, xlator_t *this, void *data) + + dict_to_xdr(args->xdata, &req.xdata); + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_NAMELINK, +- client4_namelink_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_namelink_req); ++ client4_namelink_cbk, NULL, ++ (xdrproc_t)xdr_gfx_namelink_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -5837,8 +5851,8 @@ client4_0_icreate(call_frame_t *frame, xlator_t *this, void *data) + op_errno = ESTALE; + dict_to_xdr(args->xdata, &req.xdata); + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_ICREATE, +- client4_icreate_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_icreate_req); ++ client4_icreate_cbk, NULL, ++ (xdrproc_t)xdr_gfx_icreate_req); + if (ret) + goto free_reqdata; + GF_FREE(req.xdata.pairs.pairs_val); +@@ -5864,10 +5878,12 @@ client4_0_put(call_frame_t *frame, xlator_t *this, void *data) + int op_errno = ESTALE; + int ret = 0; + clnt_local_t *local = NULL; ++ client_payload_t cp; + + if (!frame || !this || !data) + goto unwind; + ++ memset(&cp, 0, sizeof(client_payload_t)); + args = data; + conf = this->private; + +@@ -5890,9 +5906,11 @@ client4_0_put(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ++ cp.iobref = args->iobref; ++ cp.payload = args->vector; ++ cp.payload_cnt = args->count; + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_PUT, +- client4_0_put_cbk, args->iobref, args->vector, +- args->count, NULL, 0, NULL, ++ client4_0_put_cbk, &cp, + (xdrproc_t)xdr_gfx_put_req); + if (ret) { + /* +@@ -5959,10 +5977,10 @@ client4_0_copy_file_range(call_frame_t *frame, xlator_t *this, void *data) + local->attempt_reopen_out = client_is_reopen_needed(args->fd_out, this, + req.fd_out); + +- ret = client_submit_request( +- this, &req, frame, conf->fops, GFS3_OP_COPY_FILE_RANGE, +- client4_0_copy_file_range_cbk, NULL, NULL, 0, NULL, 0, NULL, +- (xdrproc_t)xdr_gfx_copy_file_range_req); ++ ret = client_submit_request(this, &req, frame, conf->fops, ++ GFS3_OP_COPY_FILE_RANGE, ++ client4_0_copy_file_range_cbk, NULL, ++ (xdrproc_t)xdr_gfx_copy_file_range_req); + if (ret) { + /* + * If the lower layers fail to submit a request, they'll also +@@ -6009,8 +6027,8 @@ client4_0_fsetattr(call_frame_t *frame, xlator_t *this, void *data) + goto unwind; + } + ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_FSETATTR, +- client4_0_fsetattr_cbk, NULL, NULL, 0, NULL, 0, +- NULL, (xdrproc_t)xdr_gfx_fsetattr_req); ++ client4_0_fsetattr_cbk, NULL, ++ (xdrproc_t)xdr_gfx_fsetattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +@@ -6054,9 +6072,9 @@ client4_0_rchecksum(call_frame_t *frame, xlator_t *this, void *data) + + dict_to_xdr(args->xdata, &req.xdata); + +- ret = client_submit_request( +- this, &req, frame, conf->fops, GFS3_OP_RCHECKSUM, client4_rchecksum_cbk, +- NULL, NULL, 0, NULL, 0, NULL, (xdrproc_t)xdr_gfx_rchecksum_req); ++ ret = client_submit_request(this, &req, frame, conf->fops, ++ GFS3_OP_RCHECKSUM, client4_rchecksum_cbk, NULL, ++ (xdrproc_t)xdr_gfx_rchecksum_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index dea6c28..2d75714 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -131,10 +131,7 @@ client_type_to_gf_type(short l_type) + int + client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn, +- struct iobref *iobref, struct iovec *payload, +- int payloadcnt, struct iovec *rsp_payload, +- int rsp_payload_count, struct iobref *rsp_iobref, +- xdrproc_t xdrproc) ++ client_payload_t *cp, xdrproc_t xdrproc) + { + int ret = -1; + clnt_conf_t *conf = NULL; +@@ -180,8 +177,8 @@ client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + goto out; + } + +- if (iobref != NULL) { +- ret = iobref_merge(new_iobref, iobref); ++ if (cp && cp->iobref != NULL) { ++ ret = iobref_merge(new_iobref, cp->iobref); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_NO_MEMORY, + "cannot merge " +@@ -224,9 +221,16 @@ client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + } + + /* Send the msg */ +- ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count, payload, +- payloadcnt, new_iobref, frame, payload, payloadcnt, +- rsp_payload, rsp_payload_count, rsp_iobref); ++ if (cp) { ++ ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count, ++ cp->payload, cp->payload_cnt, new_iobref, frame, ++ cp->rsphdr, cp->rsphdr_cnt, cp->rsp_payload, ++ cp->rsp_payload_cnt, cp->rsp_iobref); ++ } else { ++ ret = rpc_clnt_submit(conf->rpc, prog, procnum, cbkfn, &iov, count, ++ NULL, 0, new_iobref, frame, NULL, 0, NULL, 0, ++ NULL); ++ } + + if (ret < 0) { + gf_msg_debug(this->name, 0, "rpc_clnt_submit failed"); +diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h +index 71f84f3..f12fa61 100644 +--- a/xlators/protocol/client/src/client.h ++++ b/xlators/protocol/client/src/client.h +@@ -345,6 +345,17 @@ typedef struct client_args { + lock_migration_info_t *locklist; + } clnt_args_t; + ++typedef struct client_payload { ++ struct iobref *iobref; ++ struct iovec *payload; ++ struct iovec *rsphdr; ++ struct iovec *rsp_payload; ++ struct iobref *rsp_iobref; ++ int payload_cnt; ++ int rsphdr_cnt; ++ int rsp_payload_cnt; ++} client_payload_t; ++ + typedef ssize_t (*gfs_serialize_t)(struct iovec outmsg, void *args); + + clnt_fd_ctx_t * +@@ -359,10 +370,7 @@ client_local_wipe(clnt_local_t *local); + int + client_submit_request(xlator_t *this, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbk, +- struct iobref *iobref, struct iovec *rsphdr, +- int rsphdr_count, struct iovec *rsp_payload, +- int rsp_count, struct iobref *rsp_iobref, +- xdrproc_t xdrproc); ++ client_payload_t *cp, xdrproc_t xdrproc); + + int + client_submit_compound_request(xlator_t *this, void *req, call_frame_t *frame, +-- +1.8.3.1 + diff --git a/SOURCES/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch b/SOURCES/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch new file mode 100644 index 0000000..9f5f3bf --- /dev/null +++ b/SOURCES/0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch @@ -0,0 +1,115 @@ +From 2449a1824c6f7b57889335caaeb09f4c5cb3efce Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Thu, 28 Mar 2019 14:59:00 +0530 +Subject: [PATCH 48/52] gfapi: Unblock epoll thread for upcall processing + +With commit#ad35193,we have made changes to offload +processing upcall notifications to synctask so as not +to block epoll threads. However seems like the issue wasnt +fully addressed. + +In "glfs_cbk_upcall_data" -> "synctask_new1" after creating synctask +if there is no callback defined, the thread waits on synctask_join +till the syncfn is finished. So that way even with those changes, +epoll threads are blocked till the upcalls are processed. + +Hence the right fix now is to define a callback function for that +synctask "glfs_cbk_upcall_syncop" so as to unblock epoll/notify threads +completely and the upcall processing can happen in parallel by synctask +threads. + +Upstream references- +mainline : https://review.gluster.org/22436 +release-6.0 : https://review.gluster.org/22459 + +Change-Id: I4d8645e3588fab2c3ca534e0112773aaab68a5dd +fixes: bz#1694565 +Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166586 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + api/src/glfs-fops.c | 42 ++++++++++++++++++++++++++++++++++-------- + 1 file changed, 34 insertions(+), 8 deletions(-) + +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index 88cd32b..01ba60b 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -5714,6 +5714,16 @@ out: + } + + static int ++glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque) ++{ ++ struct upcall_syncop_args *args = opaque; ++ ++ GF_FREE(args->upcall_data); ++ GF_FREE(args); ++ return 0; ++} ++ ++static int + glfs_cbk_upcall_syncop(void *opaque) + { + struct upcall_syncop_args *args = opaque; +@@ -5770,15 +5780,13 @@ out: + GLFS_FREE(up_arg); + } + +- return ret; ++ return 0; + } + + static void + glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data) + { +- struct upcall_syncop_args args = { +- 0, +- }; ++ struct upcall_syncop_args *args = NULL; + int ret = -1; + + if (!fs || !upcall_data) +@@ -5789,16 +5797,34 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data) + goto out; + } + +- args.fs = fs; +- args.upcall_data = upcall_data; ++ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args), ++ glfs_mt_upcall_entry_t); ++ if (!args) { ++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, ++ "Upcall syncop args allocation failed."); ++ goto out; ++ } ++ ++ /* Note: we are not taking any ref on fs here. ++ * Ideally applications have to unregister for upcall events ++ * or stop polling for upcall events before performing ++ * glfs_fini. And as for outstanding synctasks created, we wait ++ * for all syncenv threads to finish tasks before cleaning up the ++ * fs->ctx. Hence it seems safe to process these callback ++ * notification without taking any lock/ref. ++ */ ++ args->fs = fs; ++ args->upcall_data = gf_memdup(upcall_data, sizeof(*upcall_data)); + +- ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop, NULL, NULL, +- &args); ++ ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop, ++ glfs_upcall_syncop_cbk, NULL, args); + /* should we retry incase of failure? */ + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED, + "Synctak for Upcall event_type(%d) and gfid(%s) failed", + upcall_data->event_type, (char *)(upcall_data->gfid)); ++ GF_FREE(args->upcall_data); ++ GF_FREE(args); + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0049-transport-socket-log-shutdown-msg-occasionally.patch b/SOURCES/0049-transport-socket-log-shutdown-msg-occasionally.patch new file mode 100644 index 0000000..7cee050 --- /dev/null +++ b/SOURCES/0049-transport-socket-log-shutdown-msg-occasionally.patch @@ -0,0 +1,49 @@ +From e205516ef874d617e4756856098bf10e17b0ea3d Mon Sep 17 00:00:00 2001 +From: Raghavendra G <rgowdapp@redhat.com> +Date: Fri, 22 Mar 2019 10:40:45 +0530 +Subject: [PATCH 49/52] transport/socket: log shutdown msg occasionally + +>Change-Id: If3fc0884e7e2f45de2d278b98693b7a473220a5f +>Signed-off-by: Raghavendra G <rgowdapp@redhat.com> +>Fixes: bz#1691616 + +BUG: 1691620 +Change-Id: If3fc0884e7e2f45de2d278b98693b7a473220a5f +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167088 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 4 ++-- + rpc/rpc-transport/socket/src/socket.h | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 121d46b..f6de1d3 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -807,8 +807,8 @@ __socket_shutdown(rpc_transport_t *this) + gf_log(this->name, GF_LOG_DEBUG, "shutdown() returned %d. %s", ret, + strerror(errno)); + } else { +- gf_log(this->name, GF_LOG_INFO, "intentional socket shutdown(%d)", +- priv->sock); ++ GF_LOG_OCCASIONALLY(priv->shutdown_log_ctr, this->name, GF_LOG_INFO, ++ "intentional socket shutdown(%d)", priv->sock); + } + + return ret; +diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h +index 32339d3..e1ccae2 100644 +--- a/rpc/rpc-transport/socket/src/socket.h ++++ b/rpc/rpc-transport/socket/src/socket.h +@@ -219,6 +219,7 @@ typedef struct { + int keepalivecnt; + int timeout; + int log_ctr; ++ int shutdown_log_ctr; + /* ssl_error_required is used only during the SSL connection setup + * phase. + * It holds the error code returned by SSL_get_error() and is used to +-- +1.8.3.1 + diff --git a/SOURCES/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch b/SOURCES/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch new file mode 100644 index 0000000..f7c0f65 --- /dev/null +++ b/SOURCES/0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch @@ -0,0 +1,142 @@ +From 161a039f8088bf8ce7000d8175e3793219525179 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Thu, 28 Mar 2019 07:17:16 -0400 +Subject: [PATCH 50/52] geo-rep: Fix syncing multiple rename of symlink + +Problem: +Geo-rep fails to sync rename of symlink if it's +renamed multiple times if creation and rename +happened successively + +Worker crash at slave: +Traceback (most recent call last): + File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", in worker + res = getattr(self.obj, rmeth)(*in_data[2:]) + File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", in entry_ops + [ESTALE, EINVAL, EBUSY]) + File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", in errno_wrap + return call(*arg) + File "/usr/libexec/glusterfs/python/syncdaemon/libcxattr.py", in lsetxattr + cls.raise_oserr() + File "/usr/libexec/glusterfs/python/syncdaemon/libcxattr.py", in raise_oserr + raise OSError(errn, os.strerror(errn)) +OSError: [Errno 12] Cannot allocate memory + +Geo-rep Behaviour: +1. SYMLINK doesn't record target path in changelog. + So while syncing SYMLINK, readlink is done on + master to get target path. + +2. Geo-rep will create destination if source is not + present while syncing RENAME. Hence while syncing + RENAME of SYMLINK, target path is collected from + destination. + +Cause: +If symlink is created and renamed multiple times, creation of +symlink is ignored, as it's no longer present on master at +that path. While symlink is renamed multiple times at master, +when syncing first RENAME of SYMLINK, both source and destination +is not present, hence target path is not known. In this case, +while creating destination directly at slave, regular file +attributes were encoded into blob instead of symlink, +causing failure in gfid-access translator while decoding +blob. + +Solution: +While syncing of RENAME of SYMLINK, when target is not known +and when src and destination is not present on the master, +don't create destination. Ignore the rename. It's ok to ignore. +If it's unliked, it's fine. If it's renamed to something else, +it will be synced then. + +Backport of: +> Patch: https://review.gluster.org/22438 +> Change-Id: Ibdfa495513b7c05b5370ab0b89c69a6802338d87 +> BUG: 1693648 +> Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: Ibdfa495513b7c05b5370ab0b89c69a6802338d87 +fixes: bz#1670429 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167122 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/resource.py | 23 ++++++++++++++--------- + tests/00-geo-rep/georep-basic-dr-rsync.t | 1 + + tests/geo-rep.rc | 12 ++++++++++++ + 3 files changed, 27 insertions(+), 9 deletions(-) + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index a2d0b16..c290d86 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -625,15 +625,20 @@ class Server(object): + # exist with different gfid. + if not matching_disk_gfid(gfid, entry): + if e['stat'] and not stat.S_ISDIR(e['stat']['mode']): +- if stat.S_ISLNK(e['stat']['mode']) and \ +- e['link'] is not None: +- st1 = lstat(en) +- if isinstance(st1, int): +- (pg, bname) = entry2pb(en) +- blob = entry_pack_symlink(cls, gfid, bname, +- e['link'], e['stat']) +- elif not matching_disk_gfid(gfid, en): +- collect_failure(e, EEXIST, uid, gid, True) ++ if stat.S_ISLNK(e['stat']['mode']): ++ # src is not present, so don't sync symlink as ++ # we don't know target. It's ok to ignore. If ++ # it's unliked, it's fine. If it's renamed to ++ # something else, it will be synced then. ++ if e['link'] is not None: ++ st1 = lstat(en) ++ if isinstance(st1, int): ++ (pg, bname) = entry2pb(en) ++ blob = entry_pack_symlink(cls, gfid, bname, ++ e['link'], ++ e['stat']) ++ elif not matching_disk_gfid(gfid, en): ++ collect_failure(e, EEXIST, uid, gid, True) + else: + slink = os.path.join(pfx, gfid) + st = lstat(slink) +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index 4a03930..8b64370 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -110,6 +110,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1 + #Check History Crawl. + TEST $GEOREP_CLI $master $slave stop + TEST create_data "history" ++TEST create_rename_symlink_case + TEST $GEOREP_CLI $master $slave start + EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active" + EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive" +diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc +index 396b4c4..d723129 100644 +--- a/tests/geo-rep.rc ++++ b/tests/geo-rep.rc +@@ -19,6 +19,18 @@ function check_common_secret_file() + echo $? + } + ++function create_rename_symlink_case() ++{ ++ mkdir ${mastermnt}/MUL_REN_SYMLINK ++ cd ${mastermnt}/MUL_REN_SYMLINK ++ mkdir sym_dir1 ++ ln -s "sym_dir1" sym1 ++ mv sym1 sym2 ++ mv sym2 sym3 ++ mv sym3 sym4 ++ cd - ++} ++ + function create_data() + { + prefix=$1 +-- +1.8.3.1 + diff --git a/SOURCES/0051-spec-update-rpm-install-condition.patch b/SOURCES/0051-spec-update-rpm-install-condition.patch new file mode 100644 index 0000000..8d5ce47 --- /dev/null +++ b/SOURCES/0051-spec-update-rpm-install-condition.patch @@ -0,0 +1,67 @@ +From 71f4d55770287288f39b31a0435916ac3d9f742b Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Fri, 5 Apr 2019 22:27:52 +0530 +Subject: [PATCH 51/52] spec: update rpm install condition + +Update code to allow rpm install without gluster process shutdown. + +Label: DOWNSTREAM ONLY + +BUG: 1493284 +Change-Id: I308e7e4629a2428927a6df34536e3cd645a54f8c +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167089 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Milind Changire <mchangir@redhat.com> +--- + glusterfs.spec.in | 34 ---------------------------------- + 1 file changed, 34 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 7c7f7c0..0d57b49 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1629,40 +1629,6 @@ if not (ok == 0) then + end + + +-%pretrans devel -p <lua> +-if not posix.access("/bin/bash", "x") then +- -- initial installation, no shell, no running glusterfsd +- return 0 +-end +- +--- TODO: move this completely to a lua script +--- For now, we write a temporary bash script and execute that. +- +-script = [[#!/bin/sh +-pidof -c -o %PPID -x glusterfsd &>/dev/null +- +-if [ $? -eq 0 ]; then +- pushd . > /dev/null 2>&1 +- for volume in /var/lib/glusterd/vols/*; do cd $volume; +- vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` +- volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` +- if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then +- exit 1; +- fi +- done +- +- popd > /dev/null 2>&1 +- exit 1; +-fi +-]] +- +-ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) +-end +- +- +- + %pretrans fuse -p <lua> + if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd +-- +1.8.3.1 + diff --git a/SOURCES/0052-geo-rep-IPv6-support.patch b/SOURCES/0052-geo-rep-IPv6-support.patch new file mode 100644 index 0000000..12c6e1b --- /dev/null +++ b/SOURCES/0052-geo-rep-IPv6-support.patch @@ -0,0 +1,299 @@ +From d7bb933742f4d9135621590bf13713633c549af1 Mon Sep 17 00:00:00 2001 +From: Aravinda VK <avishwan@redhat.com> +Date: Thu, 14 Mar 2019 20:06:54 +0530 +Subject: [PATCH 52/52] geo-rep: IPv6 support + +`address_family=inet6` needs to be added while mounting master and +slave volumes in gverify script. + +New option introduced to gluster cli(`--inet6`) which will be used +internally by geo-rep while calling `gluster volume info +--remote-host=<ipv6>`. + +Backport of https://review.gluster.org/22363 + +Fixes: bz#1688231 +Change-Id: I1e0d42cae07158df043e64a2f991882d8c897837 +Signed-off-by: Aravinda VK <avishwan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167120 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli.c | 11 ++++++++++ + cli/src/cli.h | 1 + + geo-replication/src/gverify.sh | 22 ++++++++++++++++---- + geo-replication/syncdaemon/argsupgrade.py | 13 +++++++++++- + geo-replication/syncdaemon/gsyncd.py | 1 + + geo-replication/syncdaemon/subcmds.py | 9 +++++++-- + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 30 ++++++++++++++++++++++++++-- + 7 files changed, 78 insertions(+), 9 deletions(-) + +diff --git a/cli/src/cli.c b/cli/src/cli.c +index 08f117e..c33d152 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -433,6 +433,12 @@ cli_opt_parse(char *opt, struct cli_state *state) + return 0; + } + ++ oarg = strtail(opt, "inet6"); ++ if (oarg) { ++ state->address_family = "inet6"; ++ return 0; ++ } ++ + oarg = strtail(opt, "log-file="); + if (oarg) { + state->log_file = oarg; +@@ -679,6 +685,11 @@ cli_rpc_init(struct cli_state *state) + this = THIS; + cli_rpc_prog = &cli_prog; + ++ /* If address family specified in CLI */ ++ if (state->address_family) { ++ addr_family = state->address_family; ++ } ++ + /* Connect to glusterd using the specified method, giving preference + * to a unix socket connection. If nothing is specified, connect to + * the default glusterd socket. +diff --git a/cli/src/cli.h b/cli/src/cli.h +index 5df86d5..b79a0a2 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -136,6 +136,7 @@ struct cli_state { + gf_loglevel_t log_level; + + char *glusterd_sock; ++ char *address_family; + }; + + struct cli_local { +diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh +index d048de0..7c88f9f 100755 +--- a/geo-replication/src/gverify.sh ++++ b/geo-replication/src/gverify.sh +@@ -94,6 +94,7 @@ echo $cmd_line; + function master_stats() + { + MASTERVOL=$1; ++ local inet6=$2; + local d; + local i; + local disk_size; +@@ -102,7 +103,12 @@ function master_stats() + local m_status; + + d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null); +- glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d; ++ if [ "$inet6" = "inet6" ]; then ++ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-id $MASTERVOL -l $master_log_file $d; ++ else ++ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d; ++ fi ++ + i=$(get_inode_num $d); + if [[ "$i" -ne "1" ]]; then + echo 0:0; +@@ -124,12 +130,18 @@ function slave_stats() + SLAVEUSER=$1; + SLAVEHOST=$2; + SLAVEVOL=$3; ++ local inet6=$4; + local cmd_line; + local ver; + local status; + + d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null); +- glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; ++ if [ "$inet6" = "inet6" ]; then ++ glusterfs --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; ++ else ++ glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d; ++ fi ++ + i=$(get_inode_num $d); + if [[ "$i" -ne "1" ]]; then + echo 0:0; +@@ -167,6 +179,8 @@ function main() + log_file=$6 + > $log_file + ++ inet6=$7 ++ + # Use FORCE_BLOCKER flag in the error message to differentiate + # between the errors which the force command should bypass + +@@ -204,8 +218,8 @@ function main() + fi; + + ERRORS=0; +- master_data=$(master_stats $1); +- slave_data=$(slave_stats $2 $3 $4); ++ master_data=$(master_stats $1 ${inet6}); ++ slave_data=$(slave_stats $2 $3 $4 ${inet6}); + master_disk_size=$(echo $master_data | cut -f1 -d':'); + slave_disk_size=$(echo $slave_data | cut -f1 -d':'); + master_used_size=$(echo $master_data | cut -f2 -d':'); +diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py +index 4018143..7af4063 100644 +--- a/geo-replication/syncdaemon/argsupgrade.py ++++ b/geo-replication/syncdaemon/argsupgrade.py +@@ -84,6 +84,10 @@ def upgrade(): + # fail when it does stat to check the existence. + init_gsyncd_template_conf() + ++ inet6 = False ++ if "--inet6" in sys.argv: ++ inet6 = True ++ + if "--monitor" in sys.argv: + # python gsyncd.py --path=/bricks/b1 + # --monitor -c gsyncd.conf +@@ -147,8 +151,11 @@ def upgrade(): + + user, hname = remote_addr.split("@") + ++ if not inet6: ++ hname = gethostbyname(hname) ++ + print(("ssh://%s@%s:gluster://127.0.0.1:%s" % ( +- user, gethostbyname(hname), vol))) ++ user, hname, vol))) + + sys.exit(0) + elif "--normalize-url" in sys.argv: +@@ -346,3 +353,7 @@ def upgrade(): + + if pargs.reset_sync_time: + sys.argv.append("--reset-sync-time") ++ ++ if inet6: ++ # Add `--inet6` as first argument ++ sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:] +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 037f351..effe0ce 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -47,6 +47,7 @@ def main(): + sys.exit(0) + + parser = ArgumentParser() ++ parser.add_argument("--inet6", action="store_true") + sp = parser.add_subparsers(dest="subcmd") + + # Monitor Status File update +diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py +index 30050ec..4ece7e0 100644 +--- a/geo-replication/syncdaemon/subcmds.py ++++ b/geo-replication/syncdaemon/subcmds.py +@@ -110,8 +110,13 @@ def subcmd_voluuidget(args): + + ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError + +- po = Popen(['gluster', '--xml', '--remote-host=' + args.host, +- 'volume', 'info', args.volname], bufsize=0, ++ cmd = ['gluster', '--xml', '--remote-host=' + args.host, ++ 'volume', 'info', args.volname] ++ ++ if args.inet6: ++ cmd.append("--inet6") ++ ++ po = Popen(cmd, bufsize=0, + stdin=None, stdout=PIPE, stderr=PIPE, + universal_newlines=True) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +index 49baa58..0f40bea 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c ++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +@@ -76,6 +76,19 @@ static char *gsync_reserved_opts[] = {"gluster-command", + static char *gsync_no_restart_opts[] = {"checkpoint", "log_rsync_performance", + "log-rsync-performance", NULL}; + ++void ++set_gsyncd_inet6_arg(runner_t *runner) ++{ ++ xlator_t *this = NULL; ++ char *af; ++ int ret; ++ ++ this = THIS; ++ ret = dict_get_str(this->options, "transport.address-family", &af); ++ if (ret == 0) ++ runner_argprintf(runner, "--%s", af); ++} ++ + int + __glusterd_handle_sys_exec(rpcsvc_request_t *req) + { +@@ -384,6 +397,7 @@ glusterd_urltransform_init(runner_t *runner, const char *transname) + { + runinit(runner); + runner_add_arg(runner, GSYNCD_PREFIX "/gsyncd"); ++ set_gsyncd_inet6_arg(runner); + runner_argprintf(runner, "--%s-url", transname); + } + +@@ -725,6 +739,7 @@ glusterd_get_slave_voluuid(char *slave_host, char *slave_vol, char *vol_uuid) + + runinit(&runner); + runner_add_arg(&runner, GSYNCD_PREFIX "/gsyncd"); ++ set_gsyncd_inet6_arg(&runner); + runner_add_arg(&runner, "--slavevoluuid-get"); + runner_argprintf(&runner, "%s::%s", slave_host, slave_vol); + +@@ -788,6 +803,7 @@ glusterd_gsync_get_config(char *master, char *slave, char *conf_path, + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); ++ set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--config-get-all", NULL); +@@ -917,6 +933,7 @@ glusterd_gsync_get_status(char *master, char *slave, char *conf_path, + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); ++ set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--status-get", NULL); +@@ -937,6 +954,7 @@ glusterd_gsync_get_param_file(char *prmfile, const char *param, char *master, + runinit(&runner); + runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL); + runner_argprintf(&runner, "%s", conf_path); ++ set_gsyncd_inet6_arg(&runner); + runner_argprintf(&runner, "--iprefix=%s", DATADIR); + runner_argprintf(&runner, ":%s", master); + runner_add_args(&runner, slave, "--config-get", NULL); +@@ -2811,6 +2829,7 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol, + char *slave_ip = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; ++ char *af = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -2852,9 +2871,16 @@ glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol, + runner_argprintf(&runner, "%s", slave_vol); + runner_argprintf(&runner, "%d", ssh_port); + runner_argprintf(&runner, "%s", log_file_path); +- gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s", ++ ret = dict_get_str(this->options, "transport.address-family", &af); ++ if (ret) ++ af = "-"; ++ ++ runner_argprintf(&runner, "%s", af); ++ ++ gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s %s", + runner.argv[0], runner.argv[1], runner.argv[2], runner.argv[3], +- runner.argv[4], runner.argv[5], runner.argv[6]); ++ runner.argv[4], runner.argv[5], runner.argv[6], ++ runner.argv[7]); + runner_redir(&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock(&priv->big_lock); + ret = runner_run(&runner); +-- +1.8.3.1 + diff --git a/SOURCES/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch b/SOURCES/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch new file mode 100644 index 0000000..337370d --- /dev/null +++ b/SOURCES/0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch @@ -0,0 +1,575 @@ +From 1fb89973551937f34f24b45e07072a6ce6c30ff9 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 16 Oct 2017 14:18:31 +0530 +Subject: [PATCH 053/124] Revert "packaging: (ganesha) remove glusterfs-ganesha + subpackage and related files)" + +This reverts commit 0cf2963f12a8b540a7042605d8c79f638fdf6cee. + +Label: DOWNSTREAM ONLY + +Change-Id: Id6e7585021bd4dd78a59580cfa4838bdd4e539a0 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167102 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + configure.ac | 3 + + extras/Makefile.am | 3 +- + extras/ganesha/Makefile.am | 2 + + extras/ganesha/config/Makefile.am | 4 + + extras/ganesha/config/ganesha-ha.conf.sample | 19 ++++ + extras/ganesha/scripts/Makefile.am | 4 + + extras/ganesha/scripts/create-export-ganesha.sh | 91 +++++++++++++++ + extras/ganesha/scripts/dbus-send.sh | 60 ++++++++++ + extras/ganesha/scripts/generate-epoch.py | 48 ++++++++ + extras/hook-scripts/start/post/Makefile.am | 2 +- + extras/hook-scripts/start/post/S31ganesha-start.sh | 122 +++++++++++++++++++++ + glusterfs.spec.in | 44 +++++++- + 12 files changed, 396 insertions(+), 6 deletions(-) + create mode 100644 extras/ganesha/Makefile.am + create mode 100644 extras/ganesha/config/Makefile.am + create mode 100644 extras/ganesha/config/ganesha-ha.conf.sample + create mode 100644 extras/ganesha/scripts/Makefile.am + create mode 100755 extras/ganesha/scripts/create-export-ganesha.sh + create mode 100755 extras/ganesha/scripts/dbus-send.sh + create mode 100755 extras/ganesha/scripts/generate-epoch.py + create mode 100755 extras/hook-scripts/start/post/S31ganesha-start.sh + +diff --git a/configure.ac b/configure.ac +index 0d06f5a..125ae29 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -196,6 +196,9 @@ AC_CONFIG_FILES([Makefile + extras/init.d/glustereventsd-Debian + extras/init.d/glustereventsd-Redhat + extras/init.d/glustereventsd-FreeBSD ++ extras/ganesha/Makefile ++ extras/ganesha/config/Makefile ++ extras/ganesha/scripts/Makefile + extras/systemd/Makefile + extras/systemd/glusterd.service + extras/systemd/glustereventsd.service +diff --git a/extras/Makefile.am b/extras/Makefile.am +index ff5ca9b..983f014 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -11,7 +11,8 @@ EditorModedir = $(docdir) + EditorMode_DATA = glusterfs-mode.el glusterfs.vim + + SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \ +- $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python ++ $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \ ++ ganesha + + confdir = $(sysconfdir)/glusterfs + if WITH_SERVER +diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am +new file mode 100644 +index 0000000..542de68 +--- /dev/null ++++ b/extras/ganesha/Makefile.am +@@ -0,0 +1,2 @@ ++SUBDIRS = scripts config ++CLEANFILES = +diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am +new file mode 100644 +index 0000000..c729273 +--- /dev/null ++++ b/extras/ganesha/config/Makefile.am +@@ -0,0 +1,4 @@ ++EXTRA_DIST= ganesha-ha.conf.sample ++ ++confdir = $(sysconfdir)/ganesha ++conf_DATA = ganesha-ha.conf.sample +diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample +new file mode 100644 +index 0000000..c22892b +--- /dev/null ++++ b/extras/ganesha/config/ganesha-ha.conf.sample +@@ -0,0 +1,19 @@ ++# Name of the HA cluster created. ++# must be unique within the subnet ++HA_NAME="ganesha-ha-360" ++# ++# N.B. you may use short names or long names; you may not use IP addrs. ++# Once you select one, stay with it as it will be mildly unpleasant to ++# clean up if you switch later on. Ensure that all names - short and/or ++# long - are in DNS or /etc/hosts on all machines in the cluster. ++# ++# The subset of nodes of the Gluster Trusted Pool that form the ganesha ++# HA cluster. Hostname is specified. ++HA_CLUSTER_NODES="server1,server2,..." ++#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..." ++# ++# Virtual IPs for each of the nodes specified above. ++VIP_server1="10.0.2.1" ++VIP_server2="10.0.2.2" ++#VIP_server1_lab_redhat_com="10.0.2.1" ++#VIP_server2_lab_redhat_com="10.0.2.2" +diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am +new file mode 100644 +index 0000000..00a2c45 +--- /dev/null ++++ b/extras/ganesha/scripts/Makefile.am +@@ -0,0 +1,4 @@ ++EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh ++ ++scriptsdir = $(libexecdir)/ganesha ++scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py +diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh +new file mode 100755 +index 0000000..1ffba42 +--- /dev/null ++++ b/extras/ganesha/scripts/create-export-ganesha.sh +@@ -0,0 +1,91 @@ ++#!/bin/bash ++ ++#This script is called by glusterd when the user ++#tries to export a volume via NFS-Ganesha. ++#An export file specific to a volume ++#is created in GANESHA_DIR/exports. ++ ++# Try loading the config from any of the distro ++# specific configuration locations ++if [ -f /etc/sysconfig/ganesha ] ++ then ++ . /etc/sysconfig/ganesha ++fi ++if [ -f /etc/conf.d/ganesha ] ++ then ++ . /etc/conf.d/ganesha ++fi ++if [ -f /etc/default/ganesha ] ++ then ++ . /etc/default/ganesha ++fi ++ ++GANESHA_DIR=${1%/} ++OPTION=$2 ++VOL=$3 ++CONF=$GANESHA_DIR"/ganesha.conf" ++declare -i EXPORT_ID ++ ++function check_cmd_status() ++{ ++ if [ "$1" != "0" ] ++ then ++ rm -rf $GANESHA_DIR/exports/export.$VOL.conf ++ sed -i /$VOL.conf/d $CONF ++ exit 1 ++ fi ++} ++ ++ ++if [ ! -d "$GANESHA_DIR/exports" ]; ++ then ++ mkdir $GANESHA_DIR/exports ++ check_cmd_status `echo $?` ++fi ++ ++function write_conf() ++{ ++echo -e "# WARNING : Using Gluster CLI will overwrite manual ++# changes made to this file. To avoid it, edit the ++# file and run ganesha-ha.sh --refresh-config." ++ ++echo "EXPORT{" ++echo " Export_Id = 2;" ++echo " Path = \"/$VOL\";" ++echo " FSAL {" ++echo " name = "GLUSTER";" ++echo " hostname=\"localhost\";" ++echo " volume=\"$VOL\";" ++echo " }" ++echo " Access_type = RW;" ++echo " Disable_ACL = true;" ++echo ' Squash="No_root_squash";' ++echo " Pseudo=\"/$VOL\";" ++echo ' Protocols = "3", "4" ;' ++echo ' Transports = "UDP","TCP";' ++echo ' SecType = "sys";' ++echo " }" ++} ++if [ "$OPTION" = "on" ]; ++then ++ if ! (cat $CONF | grep $VOL.conf\"$ ) ++ then ++ write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf ++ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF ++ count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l` ++ if [ "$count" = "1" ] ; then ++ EXPORT_ID=2 ++ else ++ EXPORT_ID=`cat $GANESHA_DIR/.export_added` ++ check_cmd_status `echo $?` ++ EXPORT_ID=EXPORT_ID+1 ++ sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \ ++ $GANESHA_DIR/exports/export.$VOL.conf ++ check_cmd_status `echo $?` ++ fi ++ echo $EXPORT_ID > $GANESHA_DIR/.export_added ++ fi ++else ++ rm -rf $GANESHA_DIR/exports/export.$VOL.conf ++ sed -i /$VOL.conf/d $CONF ++fi +diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh +new file mode 100755 +index 0000000..ec8d948 +--- /dev/null ++++ b/extras/ganesha/scripts/dbus-send.sh +@@ -0,0 +1,60 @@ ++#!/bin/bash ++ ++# Try loading the config from any of the distro ++# specific configuration locations ++if [ -f /etc/sysconfig/ganesha ] ++ then ++ . /etc/sysconfig/ganesha ++fi ++if [ -f /etc/conf.d/ganesha ] ++ then ++ . /etc/conf.d/ganesha ++fi ++if [ -f /etc/default/ganesha ] ++ then ++ . /etc/default/ganesha ++fi ++ ++GANESHA_DIR=${1%/} ++OPTION=$2 ++VOL=$3 ++CONF=$GANESHA_DIR"/ganesha.conf" ++ ++function check_cmd_status() ++{ ++ if [ "$1" != "0" ] ++ then ++ logger "dynamic export failed on node :${hostname -s}" ++ fi ++} ++ ++#This function keeps track of export IDs and increments it with every new entry ++function dynamic_export_add() ++{ ++ dbus-send --system \ ++--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \ ++string:"EXPORT(Path=/$VOL)" ++ check_cmd_status `echo $?` ++} ++ ++#This function removes an export dynamically(uses the export_id of the export) ++function dynamic_export_remove() ++{ ++ removed_id=`cat $GANESHA_DIR/exports/export.$VOL.conf |\ ++grep Export_Id | awk -F"[=,;]" '{print$2}'| tr -d '[[:space:]]'` ++ dbus-send --print-reply --system \ ++--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id ++ check_cmd_status `echo $?` ++} ++ ++if [ "$OPTION" = "on" ]; ++then ++ dynamic_export_add $@ ++fi ++ ++if [ "$OPTION" = "off" ]; ++then ++ dynamic_export_remove $@ ++fi +diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py +new file mode 100755 +index 0000000..5db5e56 +--- /dev/null ++++ b/extras/ganesha/scripts/generate-epoch.py +@@ -0,0 +1,48 @@ ++#!/usr/bin/python ++# ++# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> ++# This file is part of GlusterFS. ++# ++# This file is licensed to you under your choice of the GNU Lesser ++# General Public License, version 3 or any later version (LGPLv3 or ++# later), or the GNU General Public License, version 2 (GPLv2), in all ++# cases as published by the Free Software Foundation. ++# ++# Generates unique epoch value on each gluster node to be used by ++# nfs-ganesha service on that node. ++# ++# Configure 'EPOCH_EXEC' option to this script path in ++# '/etc/sysconfig/ganesha' file used by nfs-ganesha service. ++# ++# Construct epoch as follows - ++# first 32-bit contains the now() time ++# rest 32-bit value contains the local glusterd node uuid ++ ++import time ++import binascii ++ ++# Calculate the now() time into a 64-bit integer value ++def epoch_now(): ++ epoch_time = int(time.mktime(time.localtime())) << 32 ++ return epoch_time ++ ++# Read glusterd UUID and extract first 32-bit of it ++def epoch_uuid(): ++ file_name = '/var/lib/glusterd/glusterd.info' ++ ++ for line in open(file_name): ++ if "UUID" in line: ++ glusterd_uuid = line.split('=')[1].strip() ++ ++ uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-","")) ++ ++ epoch_uuid = int(uuid_bin.encode('hex'), 32) & 0xFFFF0000 ++ return epoch_uuid ++ ++# Construct epoch as follows - ++# first 32-bit contains the now() time ++# rest 32-bit value contains the local glusterd node uuid ++epoch = (epoch_now() | epoch_uuid()) ++print str(epoch) ++ ++exit(0) +diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am +index e32546d..792019d 100644 +--- a/extras/hook-scripts/start/post/Makefile.am ++++ b/extras/hook-scripts/start/post/Makefile.am +@@ -1,4 +1,4 @@ +-EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh ++EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/ + if WITH_SERVER +diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh +new file mode 100755 +index 0000000..90ba6bc +--- /dev/null ++++ b/extras/hook-scripts/start/post/S31ganesha-start.sh +@@ -0,0 +1,122 @@ ++#!/bin/bash ++PROGNAME="Sganesha-start" ++OPTSPEC="volname:,gd-workdir:" ++VOL= ++declare -i EXPORT_ID ++ganesha_key="ganesha.enable" ++GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha" ++CONF1="$GANESHA_DIR/ganesha.conf" ++GLUSTERD_WORKDIR= ++ ++function parse_args () ++{ ++ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@) ++ eval set -- "$ARGS" ++ ++ while true; do ++ case $1 in ++ --volname) ++ shift ++ VOL=$1 ++ ;; ++ --gd-workdir) ++ shift ++ GLUSTERD_WORKDIR=$1 ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac ++ shift ++ done ++} ++ ++ ++ ++#This function generates a new export entry as export.volume_name.conf ++function write_conf() ++{ ++echo -e "# WARNING : Using Gluster CLI will overwrite manual ++# changes made to this file. To avoid it, edit the ++# file, copy it over to all the NFS-Ganesha nodes ++# and run ganesha-ha.sh --refresh-config." ++ ++echo "EXPORT{" ++echo " Export_Id = 2;" ++echo " Path = \"/$VOL\";" ++echo " FSAL {" ++echo " name = \"GLUSTER\";" ++echo " hostname=\"localhost\";" ++echo " volume=\"$VOL\";" ++echo " }" ++echo " Access_type = RW;" ++echo " Disable_ACL = true;" ++echo " Squash=\"No_root_squash\";" ++echo " Pseudo=\"/$VOL\";" ++echo " Protocols = \"3\", \"4\" ;" ++echo " Transports = \"UDP\",\"TCP\";" ++echo " SecType = \"sys\";" ++echo "}" ++} ++ ++#It adds the export dynamically by sending dbus signals ++function export_add() ++{ ++ dbus-send --print-reply --system --dest=org.ganesha.nfsd \ ++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ ++string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)" ++ ++} ++ ++# based on src/scripts/ganeshactl/Ganesha/export_mgr.py ++function is_exported() ++{ ++ local volume="${1}" ++ ++ dbus-send --type=method_call --print-reply --system \ ++ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++ org.ganesha.nfsd.exportmgr.ShowExports \ ++ | grep -w -q "/${volume}" ++ ++ return $? ++} ++ ++# Check the info file (contains the volume options) to see if Ganesha is ++# enabled for this volume. ++function ganesha_enabled() ++{ ++ local volume="${1}" ++ local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info" ++ local enabled="off" ++ ++ enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2) ++ ++ [ "${enabled}" == "on" ] ++ ++ return $? ++} ++ ++parse_args $@ ++ ++if ganesha_enabled ${VOL} && ! is_exported ${VOL} ++then ++ if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ] ++ then ++ #Remove export entry from nfs-ganesha.conf ++ sed -i /$VOL.conf/d $CONF1 ++ write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf ++ EXPORT_ID=`cat $GANESHA_DIR/.export_added` ++ EXPORT_ID=EXPORT_ID+1 ++ echo $EXPORT_ID > $GANESHA_DIR/.export_added ++ sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \ ++ $GANESHA_DIR/exports/export.$VOL.conf ++ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1 ++ else ++ EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\ ++ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') ++ fi ++ export_add $VOL ++fi ++ ++exit 0 +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 0d57b49..dd7438c 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -299,7 +299,6 @@ Obsoletes: hekafs + Obsoletes: %{name}-common < %{version}-%{release} + Obsoletes: %{name}-core < %{version}-%{release} + Obsoletes: %{name}-ufo +-Obsoletes: %{name}-ganesha + %if ( 0%{!?_with_gnfs:1} ) + Obsoletes: %{name}-gnfs + %endif +@@ -455,6 +454,30 @@ is in user space and easily manageable. + This package provides support to FUSE based clients and inlcudes the + glusterfs(d) binary. + ++%if ( 0%{!?_without_server:1} ) ++%package ganesha ++Summary: NFS-Ganesha configuration ++Group: Applications/File ++ ++Requires: %{name}-server%{?_isa} = %{version}-%{release} ++Requires: nfs-ganesha-gluster, pcs, dbus ++%if ( 0%{?rhel} && 0%{?rhel} == 6 ) ++Requires: cman, pacemaker, corosync ++%endif ++ ++%description ganesha ++GlusterFS is a distributed file-system capable of scaling to several ++petabytes. It aggregates various storage bricks over Infiniband RDMA ++or TCP/IP interconnect into one large parallel network file ++system. GlusterFS is one of the most sophisticated file systems in ++terms of features and extensibility. It borrows a powerful concept ++called Translators from GNU Hurd kernel. Much of the code in GlusterFS ++is in user space and easily manageable. ++ ++This package provides the configuration and related files for using ++NFS-Ganesha as the NFS server using GlusterFS ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + %package geo-replication + Summary: GlusterFS Geo-replication +@@ -1111,6 +1134,12 @@ exit 0 + %endif + %endif + ++%if ( 0%{?_without_server:1} ) ++#exclude ganesha related files ++%exclude %{_sysconfdir}/ganesha/* ++%exclude %{_libexecdir}/ganesha/* ++%endif ++ + %files api + %exclude %{_libdir}/*.so + # libgfapi files +@@ -1273,6 +1302,12 @@ exit 0 + %exclude %{_datadir}/glusterfs/tests/vagrant + %endif + ++%if ( 0%{!?_without_server:1} ) ++%files ganesha ++%{_sysconfdir}/ganesha/* ++%{_libexecdir}/ganesha/* ++%endif ++ + %if ( 0%{!?_without_ocf:1} ) + %files resource-agents + # /usr/lib is the standard for OCF, also on x86_64 +@@ -1396,6 +1431,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post +@@ -1868,6 +1904,9 @@ fi + %endif + + %changelog ++* Fri Apr 5 2019 Jiffin Tony Thottan <jthottan@redhat.com> ++- Adding ganesha bits back in gluster repository ++ + * Wed Mar 6 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com> + - remove unneeded ldconfig in scriptlets + - reported by Igor Gnatenko in Fedora +@@ -1960,9 +1999,6 @@ fi + * Thu Feb 16 2017 Niels de Vos <ndevos@redhat.com> + - Obsolete and Provide python-gluster for upgrading from glusterfs < 3.10 + +-* Tue Feb 7 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com> +-- remove ganesha (#1418417) +- + * Wed Feb 1 2017 Poornima G <pgurusid@redhat.com> + - Install /var/lib/glusterd/groups/metadata-cache by default + +-- +1.8.3.1 + diff --git a/SOURCES/0054-Revert-glusterd-storhaug-remove-ganesha.patch b/SOURCES/0054-Revert-glusterd-storhaug-remove-ganesha.patch new file mode 100644 index 0000000..261856c --- /dev/null +++ b/SOURCES/0054-Revert-glusterd-storhaug-remove-ganesha.patch @@ -0,0 +1,1912 @@ +From 1029c27982d2f91cb2d3c4fcc19aa5171111dfb9 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 16 Oct 2017 14:24:29 +0530 +Subject: [PATCH 054/124] Revert "glusterd: (storhaug) remove ganesha" + +This reverts commit 843e1b04b554ab887ec656ae7b468bb93ee4e2f7. + +Label: DOWNSTREAM ONLY + +Change-Id: I06b5450344c33f26da3d94b6f67051d41dfbba17 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167103 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-global.c | 57 ++ + cli/src/cli-cmd-parser.c | 122 ++- + cli/src/cli-cmd.c | 3 +- + cli/src/cli-rpc-ops.c | 82 ++ + cli/src/cli.h | 4 + + xlators/mgmt/glusterd/src/Makefile.am | 4 +- + xlators/mgmt/glusterd/src/glusterd-errno.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 915 +++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-handler.c | 79 ++ + xlators/mgmt/glusterd/src/glusterd-messages.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 45 +- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 196 +++++ + xlators/mgmt/glusterd/src/glusterd-store.h | 2 + + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 37 + + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 + + xlators/mgmt/glusterd/src/glusterd.h | 22 + + 16 files changed, 1568 insertions(+), 11 deletions(-) + create mode 100644 xlators/mgmt/glusterd/src/glusterd-ganesha.c + +diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c +index d0729ac..270b76f 100644 +--- a/cli/src/cli-cmd-global.c ++++ b/cli/src/cli-cmd-global.c +@@ -36,6 +36,10 @@ int + cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word, + const char **words, int wordcount); + ++int ++cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word, ++ const char **words, int wordcount); ++ + struct cli_cmd global_cmds[] = { + { + "global help", +@@ -48,6 +52,11 @@ struct cli_cmd global_cmds[] = { + cli_cmd_get_state_cbk, + "Get local state representation of mentioned daemon", + }, ++ { ++ "nfs-ganesha {enable| disable} ", ++ cli_cmd_ganesha_cbk, ++ "Enable/disable NFS-Ganesha support", ++ }, + {NULL, NULL, NULL}}; + + int +@@ -89,6 +98,54 @@ out: + } + + int ++cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word, ++ const char **words, int wordcount) ++ ++{ ++ int sent = 0; ++ int parse_error = 0; ++ int ret = -1; ++ rpc_clnt_procedure_t *proc = NULL; ++ call_frame_t *frame = NULL; ++ dict_t *options = NULL; ++ cli_local_t *local = NULL; ++ char *op_errstr = NULL; ++ ++ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GANESHA]; ++ ++ frame = create_frame(THIS, THIS->ctx->pool); ++ if (!frame) ++ goto out; ++ ++ ret = cli_cmd_ganesha_parse(state, words, wordcount, &options, &op_errstr); ++ if (ret) { ++ if (op_errstr) { ++ cli_err("%s", op_errstr); ++ GF_FREE(op_errstr); ++ } else ++ cli_usage_out(word->pattern); ++ parse_error = 1; ++ goto out; ++ } ++ ++ CLI_LOCAL_INIT(local, words, frame, options); ++ ++ if (proc->fn) { ++ ret = proc->fn(frame, THIS, options); ++ } ++ ++out: ++ if (ret) { ++ cli_cmd_sent_status_get(&sent); ++ if ((sent == 0) && (parse_error == 0)) ++ cli_out("Setting global option failed"); ++ } ++ ++ CLI_STACK_DESTROY(frame); ++ return ret; ++} ++ ++int + cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word, + const char **words, int wordcount) + { +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index d9ccba1..cd9c445 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1694,7 +1694,7 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + } + } + +- if ((strcmp (key, "cluster.brick-multiplex") == 0)) { ++ if ((strcmp(key, "cluster.brick-multiplex") == 0)) { + question = + "Brick-multiplexing is supported only for " + "OCS converged or independent mode. Also it is " +@@ -1703,11 +1703,12 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + "are running before this option is modified." + "Do you still want to continue?"; + +- answer = cli_cmd_get_confirmation (state, question); ++ answer = cli_cmd_get_confirmation(state, question); + if (GF_ANSWER_NO == answer) { +- gf_log ("cli", GF_LOG_ERROR, "Operation " +- "cancelled, exiting"); +- *op_errstr = gf_strdup ("Aborted by user."); ++ gf_log("cli", GF_LOG_ERROR, ++ "Operation " ++ "cancelled, exiting"); ++ *op_errstr = gf_strdup("Aborted by user."); + ret = -1; + goto out; + } +@@ -5848,3 +5849,114 @@ out: + + return ret; + } ++ ++/* Parsing global option for NFS-Ganesha config ++ * gluster nfs-ganesha enable/disable */ ++ ++int32_t ++cli_cmd_ganesha_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, char **op_errstr) ++{ ++ dict_t *dict = NULL; ++ int ret = -1; ++ char *key = NULL; ++ char *value = NULL; ++ char *w = NULL; ++ char *opwords[] = {"enable", "disable", NULL}; ++ const char *question = NULL; ++ gf_answer_t answer = GF_ANSWER_NO; ++ ++ GF_ASSERT(words); ++ GF_ASSERT(options); ++ ++ dict = dict_new(); ++ ++ if (!dict) ++ goto out; ++ ++ if (wordcount != 2) ++ goto out; ++ ++ key = (char *)words[0]; ++ value = (char *)words[1]; ++ ++ if (!key || !value) { ++ cli_out("Usage : nfs-ganesha <enable/disable>"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = gf_strip_whitespace(value, strlen(value)); ++ if (ret == -1) ++ goto out; ++ ++ if (strcmp(key, "nfs-ganesha")) { ++ gf_asprintf(op_errstr, ++ "Global option: error: ' %s '" ++ "is not a valid global option.", ++ key); ++ ret = -1; ++ goto out; ++ } ++ ++ w = str_getunamb(value, opwords); ++ if (!w) { ++ cli_out( ++ "Invalid global option \n" ++ "Usage : nfs-ganesha <enable/disable>"); ++ ret = -1; ++ goto out; ++ } ++ ++ question = ++ "Enabling NFS-Ganesha requires Gluster-NFS to be" ++ " disabled across the trusted pool. Do you " ++ "still want to continue?\n"; ++ ++ if (strcmp(value, "enable") == 0) { ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Global operation " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } ++ cli_out("This will take a few minutes to complete. Please wait .."); ++ ++ ret = dict_set_str(dict, "key", key); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, "dict set on key failed"); ++ goto out; ++ } ++ ++ ret = dict_set_str(dict, "value", value); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, "dict set on value failed"); ++ goto out; ++ } ++ ++ ret = dict_set_str(dict, "globalname", "All"); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, ++ "dict set on global" ++ " key failed."); ++ goto out; ++ } ++ ++ ret = dict_set_int32(dict, "hold_global_locks", _gf_true); ++ if (ret) { ++ gf_log(THIS->name, GF_LOG_ERROR, ++ "dict set on global key " ++ "failed."); ++ goto out; ++ } ++ ++ *options = dict; ++out: ++ if (ret) ++ dict_unref(dict); ++ ++ return ret; ++} +diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c +index 2ee8b1b..8c06905 100644 +--- a/cli/src/cli-cmd.c ++++ b/cli/src/cli-cmd.c +@@ -366,7 +366,8 @@ cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame, + unsigned timeout = 0; + + if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) || +- (GLUSTER_CLI_HEAL_VOLUME == procnum)) ++ (GLUSTER_CLI_HEAL_VOLUME == procnum) || ++ (GLUSTER_CLI_GANESHA == procnum)) + timeout = cli_ten_minutes_timeout; + else + timeout = cli_default_conn_timeout; +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 12e7fcc..736cd18 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -2207,6 +2207,62 @@ out: + return ret; + } + ++int ++gf_cli_ganesha_cbk(struct rpc_req *req, struct iovec *iov, int count, ++ void *myframe) ++{ ++ gf_cli_rsp rsp = { ++ 0, ++ }; ++ int ret = -1; ++ dict_t *dict = NULL; ++ ++ GF_ASSERT(myframe); ++ ++ if (-1 == req->rpc_status) { ++ goto out; ++ } ++ ++ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp); ++ if (ret < 0) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "Failed to decode xdr response"); ++ goto out; ++ } ++ ++ gf_log("cli", GF_LOG_DEBUG, "Received resp to ganesha"); ++ ++ dict = dict_new(); ++ ++ if (!dict) { ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict); ++ if (ret) ++ goto out; ++ ++ if (rsp.op_ret) { ++ if (strcmp(rsp.op_errstr, "")) ++ cli_err("nfs-ganesha: failed: %s", rsp.op_errstr); ++ else ++ cli_err("nfs-ganesha: failed"); ++ } ++ ++ else { ++ cli_out("nfs-ganesha : success "); ++ } ++ ++ ret = rsp.op_ret; ++ ++out: ++ if (dict) ++ dict_unref(dict); ++ cli_cmd_broadcast_response(ret); ++ return ret; ++} ++ + char * + is_server_debug_xlator(void *myframe) + { +@@ -4880,6 +4936,31 @@ out: + } + + int32_t ++gf_cli_ganesha(call_frame_t *frame, xlator_t *this, void *data) ++{ ++ gf_cli_req req = {{ ++ 0, ++ }}; ++ int ret = 0; ++ dict_t *dict = NULL; ++ ++ if (!frame || !this || !data) { ++ ret = -1; ++ goto out; ++ } ++ ++ dict = data; ++ ++ ret = cli_to_glusterd(&req, frame, gf_cli_ganesha_cbk, ++ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_GANESHA, ++ this, cli_rpc_prog, NULL); ++out: ++ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret); ++ ++ return ret; ++} ++ ++int32_t + gf_cli_set_volume(call_frame_t *frame, xlator_t *this, void *data) + { + gf_cli_req req = {{ +@@ -12214,6 +12295,7 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = { + [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec}, + [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot}, + [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume}, ++ [GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha}, + [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt}, + [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot}, + [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", gf_cli_attach_tier}, +diff --git a/cli/src/cli.h b/cli/src/cli.h +index b79a0a2..37e4d9d 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -282,6 +282,10 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + int wordcount, dict_t **options, char **op_errstr); + + int32_t ++cli_cmd_ganesha_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, char **op_errstr); ++ ++int32_t + cli_cmd_get_state_parse(struct cli_state *state, const char **words, + int wordcount, dict_t **options, char **op_errstr); + +diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am +index c8dd8e3..5fe5156 100644 +--- a/xlators/mgmt/glusterd/src/Makefile.am ++++ b/xlators/mgmt/glusterd/src/Makefile.am +@@ -10,7 +10,7 @@ glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL) + glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \ + glusterd-store.c glusterd-handshake.c glusterd-pmap.c \ +- glusterd-volgen.c glusterd-rebalance.c \ ++ glusterd-volgen.c glusterd-rebalance.c glusterd-ganesha.c \ + glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \ + glusterd-replace-brick.c glusterd-log-ops.c glusterd-tier.c \ + glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ +@@ -52,6 +52,8 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(CONTRIBDIR)/mount -I$(CONTRIBDIR)/userspace-rcu \ + -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \ + -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" \ ++ -DCONFDIR=\"$(localstatedir)/run/gluster/shared_storage/nfs-ganesha\" \ ++ -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \ + -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) + + +diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h +index 7e1575b..c74070e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-errno.h ++++ b/xlators/mgmt/glusterd/src/glusterd-errno.h +@@ -27,7 +27,7 @@ enum glusterd_op_errno { + EG_ISSNAP = 30813, /* Volume is a snap volume */ + EG_GEOREPRUN = 30814, /* Geo-Replication is running */ + EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */ +- EG_NOGANESHA = 30816, /* obsolete ganesha is not enabled */ ++ EG_NOGANESHA = 30816, /* Global ganesha is not enabled */ + }; + + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +new file mode 100644 +index 0000000..fac16e6 +--- /dev/null ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -0,0 +1,915 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include <glusterfs/common-utils.h> ++#include "glusterd.h" ++#include "glusterd-op-sm.h" ++#include "glusterd-store.h" ++#include "glusterd-utils.h" ++#include "glusterd-nfs-svc.h" ++#include "glusterd-volgen.h" ++#include "glusterd-messages.h" ++#include <glusterfs/syscall.h> ++ ++#include <ctype.h> ++ ++int ++start_ganesha(char **op_errstr); ++ ++typedef struct service_command { ++ char *binary; ++ char *service; ++ int (*action)(struct service_command *, char *); ++} service_command; ++ ++/* parsing_ganesha_ha_conf will allocate the returned string ++ * to be freed (GF_FREE) by the caller ++ * return NULL if error or not found */ ++static char * ++parsing_ganesha_ha_conf(const char *key) ++{ ++#define MAX_LINE 1024 ++ char scratch[MAX_LINE * 2] = { ++ 0, ++ }; ++ char *value = NULL, *pointer = NULL, *end_pointer = NULL; ++ FILE *fp; ++ ++ fp = fopen(GANESHA_HA_CONF, "r"); ++ if (fp == NULL) { ++ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, ++ "couldn't open the file %s", GANESHA_HA_CONF); ++ goto end_ret; ++ } ++ while ((pointer = fgets(scratch, MAX_LINE, fp)) != NULL) { ++ /* Read config file until we get matching "^[[:space:]]*key" */ ++ if (*pointer == '#') { ++ continue; ++ } ++ while (isblank(*pointer)) { ++ pointer++; ++ } ++ if (strncmp(pointer, key, strlen(key))) { ++ continue; ++ } ++ pointer += strlen(key); ++ /* key found : if we fail to parse, we'll return an error ++ * rather than trying next one ++ * - supposition : conf file is bash compatible : no space ++ * around the '=' */ ++ if (*pointer != '=') { ++ gf_msg(THIS->name, GF_LOG_ERROR, errno, ++ GD_MSG_GET_CONFIG_INFO_FAILED, "Parsing %s failed at key %s", ++ GANESHA_HA_CONF, key); ++ goto end_close; ++ } ++ pointer++; /* jump the '=' */ ++ ++ if (*pointer == '"' || *pointer == '\'') { ++ /* dont get the quote */ ++ pointer++; ++ } ++ end_pointer = pointer; ++ /* stop at the next closing quote or blank/newline */ ++ do { ++ end_pointer++; ++ } while (!(*end_pointer == '\'' || *end_pointer == '"' || ++ isspace(*end_pointer) || *end_pointer == '\0')); ++ *end_pointer = '\0'; ++ ++ /* got it. copy it and return */ ++ value = gf_strdup(pointer); ++ break; ++ } ++ ++end_close: ++ fclose(fp); ++end_ret: ++ return value; ++} ++ ++static int ++sc_systemctl_action(struct service_command *sc, char *command) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ ++ runinit(&runner); ++ runner_add_args(&runner, sc->binary, command, sc->service, NULL); ++ return runner_run(&runner); ++} ++ ++static int ++sc_service_action(struct service_command *sc, char *command) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ ++ runinit(&runner); ++ runner_add_args(&runner, sc->binary, sc->service, command, NULL); ++ return runner_run(&runner); ++} ++ ++static int ++manage_service(char *action) ++{ ++ struct stat stbuf = { ++ 0, ++ }; ++ int i = 0; ++ int ret = 0; ++ struct service_command sc_list[] = {{.binary = "/usr/bin/systemctl", ++ .service = "nfs-ganesha", ++ .action = sc_systemctl_action}, ++ {.binary = "/sbin/invoke-rc.d", ++ .service = "nfs-ganesha", ++ .action = sc_service_action}, ++ {.binary = "/sbin/service", ++ .service = "nfs-ganesha", ++ .action = sc_service_action}, ++ {.binary = NULL}}; ++ ++ while (sc_list[i].binary != NULL) { ++ ret = sys_stat(sc_list[i].binary, &stbuf); ++ if (ret == 0) { ++ gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary); ++ if (strcmp(sc_list[i].binary, "/usr/bin/systemctl") == 0) ++ ret = sc_systemctl_action(&sc_list[i], action); ++ else ++ ret = sc_service_action(&sc_list[i], action); ++ ++ return ret; ++ } ++ i++; ++ } ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNRECOGNIZED_SVC_MNGR, ++ "Could not %s NFS-Ganesha.Service manager for distro" ++ " not recognized.", ++ action); ++ return ret; ++} ++ ++/* ++ * Check if the cluster is a ganesha cluster or not * ++ */ ++gf_boolean_t ++glusterd_is_ganesha_cluster() ++{ ++ int ret = -1; ++ glusterd_conf_t *priv = NULL; ++ xlator_t *this = NULL; ++ gf_boolean_t ret_bool = _gf_false; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("ganesha", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, ++ _gf_false); ++ if (ret == _gf_true) { ++ ret_bool = _gf_true; ++ gf_msg_debug(this->name, 0, "nfs-ganesha is enabled for the cluster"); ++ } else ++ gf_msg_debug(this->name, 0, "nfs-ganesha is disabled for the cluster"); ++ ++out: ++ return ret_bool; ++} ++ ++/* Check if ganesha.enable is set to 'on', that checks if ++ * a particular volume is exported via NFS-Ganesha */ ++gf_boolean_t ++glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo) ++{ ++ char *value = NULL; ++ gf_boolean_t is_exported = _gf_false; ++ int ret = 0; ++ ++ ret = glusterd_volinfo_get(volinfo, "ganesha.enable", &value); ++ if ((ret == 0) && value) { ++ if (strcmp(value, "on") == 0) { ++ gf_msg_debug(THIS->name, 0, ++ "ganesha.enable set" ++ " to %s", ++ value); ++ is_exported = _gf_true; ++ } ++ } ++ return is_exported; ++} ++ ++/* * ++ * The below function is called as part of commit phase for volume set option ++ * "ganesha.enable". If the value is "on", it creates export configuration file ++ * and then export the volume via dbus command. Incase of "off", the volume ++ * will be already unexported during stage phase, so it will remove the conf ++ * file from shared storage ++ */ ++int ++glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict) ++{ ++ int ret = 0; ++ char *volname = NULL; ++ ++ GF_ASSERT(key); ++ GF_ASSERT(value); ++ GF_ASSERT(dict); ++ ++ if ((strcmp(key, "ganesha.enable") == 0)) { ++ if ((strcmp(value, "on")) && (strcmp(value, "off"))) { ++ gf_asprintf(errstr, ++ "Invalid value" ++ " for volume set command. Use on/off only."); ++ ret = -1; ++ goto out; ++ } ++ if (strcmp(value, "on") == 0) { ++ ret = glusterd_handle_ganesha_op(dict, errstr, key, value); ++ ++ } else if (is_origin_glusterd(dict)) { ++ ret = dict_get_str(dict, "volname", &volname); ++ if (ret) { ++ gf_msg("glusterd-ganesha", GF_LOG_ERROR, errno, ++ GD_MSG_DICT_GET_FAILED, "Unable to get volume name"); ++ goto out; ++ } ++ ret = manage_export_config(volname, "off", errstr); ++ } ++ } ++out: ++ if (ret) { ++ gf_msg("glusterd-ganesha", GF_LOG_ERROR, 0, ++ GD_MSG_NFS_GNS_OP_HANDLE_FAIL, ++ "Handling NFS-Ganesha" ++ " op failed."); ++ } ++ return ret; ++} ++ ++int ++glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) ++{ ++ int ret = -1; ++ int value = -1; ++ gf_boolean_t option = _gf_false; ++ char *str = NULL; ++ glusterd_conf_t *priv = NULL; ++ xlator_t *this = NULL; ++ ++ GF_ASSERT(dict); ++ this = THIS; ++ GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); ++ ++ value = dict_get_str_boolean(dict, "value", _gf_false); ++ if (value == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "value not present."); ++ goto out; ++ } ++ /* This dict_get will fail if the user had never set the key before */ ++ /*Ignoring the ret value and proceeding */ ++ ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_GET_FAILED, ++ "Global dict not present."); ++ ret = 0; ++ goto out; ++ } ++ /* Validity of the value is already checked */ ++ ret = gf_string2boolean(str, &option); ++ /* Check if the feature is already enabled, fail in that case */ ++ if (value == option) { ++ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", str); ++ ret = -1; ++ goto out; ++ } ++ ++ if (value) { ++ ret = start_ganesha(op_errstr); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL, ++ "Could not start NFS-Ganesha"); ++ } ++ } else { ++ ret = stop_ganesha(op_errstr); ++ if (ret) ++ gf_msg_debug(THIS->name, 0, ++ "Could not stop " ++ "NFS-Ganesha."); ++ } ++ ++out: ++ ++ if (ret) { ++ if (!(*op_errstr)) { ++ *op_errstr = gf_strdup("Error, Validation Failed"); ++ gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s", ++ GLUSTERD_STORE_KEY_GANESHA_GLOBAL); ++ } else { ++ gf_msg_debug(this->name, 0, "Error, Cannot Validate option"); ++ } ++ } ++ return ret; ++} ++ ++int ++glusterd_op_set_ganesha(dict_t *dict, char **errstr) ++{ ++ int ret = 0; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ char *key = NULL; ++ char *value = NULL; ++ char *next_version = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ GF_ASSERT(dict); ++ ++ priv = this->private; ++ GF_ASSERT(priv); ++ ++ ret = dict_get_str(dict, "key", &key); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Couldn't get key in global option set"); ++ goto out; ++ } ++ ++ ret = dict_get_str(dict, "value", &value); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Couldn't get value in global option set"); ++ goto out; ++ } ++ ++ ret = glusterd_handle_ganesha_op(dict, errstr, key, value); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_SETUP_FAIL, ++ "Initial NFS-Ganesha set up failed"); ++ ret = -1; ++ goto out; ++ } ++ ret = dict_set_dynstr_with_alloc(priv->opts, ++ GLUSTERD_STORE_KEY_GANESHA_GLOBAL, value); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set" ++ " nfs-ganesha in dict."); ++ goto out; ++ } ++ ret = glusterd_get_next_global_opt_version_str(priv->opts, &next_version); ++ if (ret) { ++ gf_msg_debug(THIS->name, 0, ++ "Could not fetch " ++ " global op version"); ++ goto out; ++ } ++ ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, next_version); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_store_options(this, priv->opts); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL, ++ "Failed to store options"); ++ goto out; ++ } ++ ++out: ++ gf_msg_debug(this->name, 0, "returning %d", ret); ++ return ret; ++} ++ ++/* Following function parse GANESHA_HA_CONF ++ * The sample file looks like below, ++ * HA_NAME="ganesha-ha-360" ++ * HA_VOL_NAME="ha-state" ++ * HA_CLUSTER_NODES="server1,server2" ++ * VIP_rhs_1="10.x.x.x" ++ * VIP_rhs_2="10.x.x.x." */ ++ ++/* Check if the localhost is listed as one of nfs-ganesha nodes */ ++gf_boolean_t ++check_host_list(void) ++{ ++ glusterd_conf_t *priv = NULL; ++ char *hostname, *hostlist; ++ gf_boolean_t ret = _gf_false; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ priv = THIS->private; ++ GF_ASSERT(priv); ++ ++ hostlist = parsing_ganesha_ha_conf("HA_CLUSTER_NODES"); ++ if (hostlist == NULL) { ++ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_CONFIG_INFO_FAILED, ++ "couldn't get HA_CLUSTER_NODES from file %s", GANESHA_HA_CONF); ++ return _gf_false; ++ } ++ ++ /* Hostlist is a comma separated list now */ ++ hostname = strtok(hostlist, ","); ++ while (hostname != NULL) { ++ ret = gf_is_local_addr(hostname); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NFS_GNS_HOST_FOUND, ++ "ganesha host found " ++ "Hostname is %s", ++ hostname); ++ break; ++ } ++ hostname = strtok(NULL, ","); ++ } ++ ++ GF_FREE(hostlist); ++ return ret; ++} ++ ++int ++manage_export_config(char *volname, char *value, char **op_errstr) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ int ret = -1; ++ ++ GF_ASSERT(volname); ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/create-export-ganesha.sh", ++ CONFDIR, value, volname, NULL); ++ ret = runner_run(&runner); ++ ++ if (ret) ++ gf_asprintf(op_errstr, ++ "Failed to create" ++ " NFS-Ganesha export config file."); ++ ++ return ret; ++} ++ ++/* Exports and unexports a particular volume via NFS-Ganesha */ ++int ++ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ int ret = -1; ++ glusterd_volinfo_t *volinfo = NULL; ++ dict_t *vol_opts = NULL; ++ char *volname = NULL; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ gf_boolean_t option = _gf_false; ++ ++ runinit(&runner); ++ this = THIS; ++ GF_ASSERT(this); ++ priv = this->private; ++ ++ GF_ASSERT(value); ++ GF_ASSERT(dict); ++ GF_ASSERT(priv); ++ ++ ret = dict_get_str(dict, "volname", &volname); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Unable to get volume name"); ++ goto out; ++ } ++ ret = gf_string2boolean(value, &option); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, ++ "invalid value."); ++ goto out; ++ } ++ ++ ret = glusterd_volinfo_find(volname, &volinfo); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, ++ FMTSTR_CHECK_VOL_EXISTS, volname); ++ goto out; ++ } ++ ++ ret = glusterd_check_ganesha_export(volinfo); ++ if (ret && option) { ++ gf_asprintf(op_errstr, ++ "ganesha.enable " ++ "is already 'on'."); ++ ret = -1; ++ goto out; ++ ++ } else if (!option && !ret) { ++ gf_asprintf(op_errstr, ++ "ganesha.enable " ++ "is already 'off'."); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Check if global option is enabled, proceed only then */ ++ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, ++ _gf_false); ++ if (ret == -1) { ++ gf_msg_debug(this->name, 0, ++ "Failed to get " ++ "global option dict."); ++ gf_asprintf(op_errstr, ++ "The option " ++ "nfs-ganesha should be " ++ "enabled before setting ganesha.enable."); ++ goto out; ++ } ++ if (!ret) { ++ gf_asprintf(op_errstr, ++ "The option " ++ "nfs-ganesha should be " ++ "enabled before setting ganesha.enable."); ++ ret = -1; ++ goto out; ++ } ++ ++ /* * ++ * Create the export file from the node where ganesha.enable "on" ++ * is executed ++ * */ ++ if (option) { ++ ret = manage_export_config(volname, "on", op_errstr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, ++ "Failed to create" ++ "export file for NFS-Ganesha\n"); ++ goto out; ++ } ++ } ++ ++ if (check_host_list()) { ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/dbus-send.sh", CONFDIR, ++ value, volname, NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "Dynamic export" ++ " addition/deletion failed." ++ " Please see log file for details"); ++ goto out; ++ } ++ } ++ ++ vol_opts = volinfo->dict; ++ ret = dict_set_dynstr_with_alloc(vol_opts, "features.cache-invalidation", ++ value); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "Cache-invalidation could not" ++ " be set to %s.", ++ value); ++ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) ++ gf_asprintf(op_errstr, "failed to store volinfo for %s", ++ volinfo->volname); ++ ++out: ++ return ret; ++} ++ ++int ++tear_down_cluster(gf_boolean_t run_teardown) ++{ ++ int ret = 0; ++ runner_t runner = { ++ 0, ++ }; ++ struct stat st = { ++ 0, ++ }; ++ DIR *dir = NULL; ++ struct dirent *entry = NULL; ++ struct dirent scratch[2] = { ++ { ++ 0, ++ }, ++ }; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ ++ if (run_teardown) { ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ "teardown", CONFDIR, NULL); ++ ret = runner_run(&runner); ++ /* * ++ * Remove all the entries in CONFDIR expect ganesha.conf and ++ * ganesha-ha.conf ++ */ ++ dir = sys_opendir(CONFDIR); ++ if (!dir) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to open directory %s. " ++ "Reason : %s", ++ CONFDIR, strerror(errno)); ++ ret = 0; ++ goto out; ++ } ++ ++ GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); ++ while (entry) { ++ snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name); ++ ret = sys_lstat(path, &st); ++ if (ret == -1) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to stat entry %s :" ++ " %s", ++ path, strerror(errno)); ++ goto out; ++ } ++ ++ if (strcmp(entry->d_name, "ganesha.conf") == 0 || ++ strcmp(entry->d_name, "ganesha-ha.conf") == 0) ++ gf_msg_debug(THIS->name, 0, ++ " %s is not required" ++ " to remove", ++ path); ++ else if (S_ISDIR(st.st_mode)) ++ ret = recursive_rmdir(path); ++ else ++ ret = sys_unlink(path); ++ ++ if (ret) { ++ gf_msg_debug(THIS->name, 0, ++ " Failed to remove %s. " ++ "Reason : %s", ++ path, strerror(errno)); ++ } ++ ++ gf_msg_debug(THIS->name, 0, "%s %s", ++ ret ? "Failed to remove" : "Removed", entry->d_name); ++ GF_SKIP_IRRELEVANT_ENTRIES(entry, dir, scratch); ++ } ++ ++ ret = sys_closedir(dir); ++ if (ret) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to close dir %s. Reason :" ++ " %s", ++ CONFDIR, strerror(errno)); ++ } ++ } ++ ++out: ++ return ret; ++} ++ ++int ++setup_cluster(gf_boolean_t run_setup) ++{ ++ int ret = 0; ++ runner_t runner = { ++ 0, ++ }; ++ ++ if (run_setup) { ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "setup", ++ CONFDIR, NULL); ++ ret = runner_run(&runner); ++ } ++ return ret; ++} ++ ++static int ++teardown(gf_boolean_t run_teardown, char **op_errstr) ++{ ++ runner_t runner = { ++ 0, ++ }; ++ int ret = 1; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *priv = NULL; ++ dict_t *vol_opts = NULL; ++ ++ priv = THIS->private; ++ ++ ret = tear_down_cluster(run_teardown); ++ if (ret == -1) { ++ gf_asprintf(op_errstr, ++ "Cleanup of NFS-Ganesha" ++ " HA config failed."); ++ goto out; ++ } ++ ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "cleanup", ++ CONFDIR, NULL); ++ ret = runner_run(&runner); ++ if (ret) ++ gf_msg_debug(THIS->name, 0, ++ "Could not clean up" ++ " NFS-Ganesha related config"); ++ ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) ++ { ++ vol_opts = volinfo->dict; ++ /* All the volumes exported via NFS-Ganesha will be ++ unexported, hence setting the appropriate keys */ ++ ret = dict_set_str(vol_opts, "features.cache-invalidation", "off"); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, ++ "Could not set features.cache-invalidation " ++ "to off for %s", ++ volinfo->volname); ++ ++ ret = dict_set_str(vol_opts, "ganesha.enable", "off"); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED, ++ "Could not set ganesha.enable to off for %s", ++ volinfo->volname); ++ ++ ret = glusterd_store_volinfo(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL, ++ "failed to store volinfo for %s", volinfo->volname); ++ } ++out: ++ return ret; ++} ++ ++int ++stop_ganesha(char **op_errstr) ++{ ++ int ret = 0; ++ runner_t runner = { ++ 0, ++ }; ++ ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ "--setup-ganesha-conf-files", CONFDIR, "no", NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "removal of symlink ganesha.conf " ++ "in /etc/ganesha failed"); ++ } ++ ++ if (check_host_list()) { ++ ret = manage_service("stop"); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "NFS-Ganesha service could not" ++ "be stopped."); ++ } ++ return ret; ++} ++ ++int ++start_ganesha(char **op_errstr) ++{ ++ int ret = -1; ++ dict_t *vol_opts = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *priv = NULL; ++ runner_t runner = { ++ 0, ++ }; ++ ++ priv = THIS->private; ++ GF_ASSERT(priv); ++ ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) ++ { ++ vol_opts = volinfo->dict; ++ /* Gluster-nfs has to be disabled across the trusted pool */ ++ /* before attempting to start nfs-ganesha */ ++ ret = dict_set_str(vol_opts, NFS_DISABLE_MAP_KEY, "on"); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_store_volinfo(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) { ++ *op_errstr = gf_strdup( ++ "Failed to store the " ++ "Volume information"); ++ goto out; ++ } ++ } ++ ++ /* If the nfs svc is not initialized it means that the service is not ++ * running, hence we can skip the process of stopping gluster-nfs ++ * service ++ */ ++ if (priv->nfs_svc.inited) { ++ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL); ++ if (ret) { ++ ret = -1; ++ gf_asprintf(op_errstr, ++ "Gluster-NFS service could" ++ "not be stopped, exiting."); ++ goto out; ++ } ++ } ++ ++ if (check_host_list()) { ++ runinit(&runner); ++ runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ "--setup-ganesha-conf-files", CONFDIR, "yes", NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "creation of symlink ganesha.conf " ++ "in /etc/ganesha failed"); ++ goto out; ++ } ++ ret = manage_service("start"); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "NFS-Ganesha failed to start." ++ "Please see log file for details"); ++ } ++ ++out: ++ return ret; ++} ++ ++static int ++pre_setup(gf_boolean_t run_setup, char **op_errstr) ++{ ++ int ret = 0; ++ ++ ret = check_host_list(); ++ ++ if (ret) { ++ ret = setup_cluster(run_setup); ++ if (ret == -1) ++ gf_asprintf(op_errstr, ++ "Failed to set up HA " ++ "config for NFS-Ganesha. " ++ "Please check the log file for details"); ++ } ++ ++ return ret; ++} ++ ++int ++glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, ++ char *value) ++{ ++ int32_t ret = -1; ++ gf_boolean_t option = _gf_false; ++ ++ GF_ASSERT(dict); ++ GF_ASSERT(op_errstr); ++ GF_ASSERT(key); ++ GF_ASSERT(value); ++ ++ if (strcmp(key, "ganesha.enable") == 0) { ++ ret = ganesha_manage_export(dict, value, op_errstr); ++ if (ret < 0) ++ goto out; ++ } ++ ++ /* It is possible that the key might not be set */ ++ ret = gf_string2boolean(value, &option); ++ if (ret == -1) { ++ gf_asprintf(op_errstr, "Invalid value in key-value pair."); ++ goto out; ++ } ++ ++ if (strcmp(key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) { ++ /* * ++ * The set up/teardown of pcs cluster should be performed only ++ * once. This will done on the node in which the cli command ++ * 'gluster nfs-ganesha <enable/disable>' got executed. So that ++ * node should part of ganesha HA cluster ++ */ ++ if (option) { ++ ret = pre_setup(is_origin_glusterd(dict), op_errstr); ++ if (ret < 0) ++ goto out; ++ } else { ++ ret = teardown(is_origin_glusterd(dict), op_errstr); ++ if (ret < 0) ++ goto out; ++ } ++ } ++ ++out: ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index de44af7..528993c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -1911,6 +1911,83 @@ glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + return ret; + } + ++int ++__glusterd_handle_ganesha_cmd(rpcsvc_request_t *req) ++{ ++ int32_t ret = -1; ++ gf_cli_req cli_req = {{ ++ 0, ++ }}; ++ dict_t *dict = NULL; ++ glusterd_op_t cli_op = GD_OP_GANESHA; ++ char *op_errstr = NULL; ++ char err_str[2048] = { ++ 0, ++ }; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ GF_ASSERT(req); ++ ++ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); ++ if (ret < 0) { ++ snprintf(err_str, sizeof(err_str), ++ "Failed to decode " ++ "request received from cli"); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", ++ err_str); ++ req->rpc_err = GARBAGE_ARGS; ++ goto out; ++ } ++ ++ if (cli_req.dict.dict_len) { ++ /* Unserialize the dictionary */ ++ dict = dict_new(); ++ if (!dict) { ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, ++ &dict); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL, ++ "failed to " ++ "unserialize req-buffer to dictionary"); ++ snprintf(err_str, sizeof(err_str), ++ "Unable to decode " ++ "the command"); ++ goto out; ++ } else { ++ dict->extra_stdfree = cli_req.dict.dict_val; ++ } ++ } ++ ++ gf_msg_trace(this->name, 0, "Received global option request"); ++ ++ ret = glusterd_op_begin_synctask(req, GD_OP_GANESHA, dict); ++out: ++ if (ret) { ++ if (err_str[0] == '\0') ++ snprintf(err_str, sizeof(err_str), "Operation failed"); ++ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str); ++ } ++ if (op_errstr) ++ GF_FREE(op_errstr); ++ if (dict) ++ dict_unref(dict); ++ ++ return ret; ++} ++ ++int ++glusterd_handle_ganesha_cmd(rpcsvc_request_t *req) ++{ ++ return glusterd_big_locked_handler(req, __glusterd_handle_ganesha_cmd); ++} ++ + static int + __glusterd_handle_reset_volume(rpcsvc_request_t *req) + { +@@ -6644,6 +6721,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { + [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME", + GLUSTER_CLI_BARRIER_VOLUME, + glusterd_handle_barrier, NULL, 0, DRC_NA}, ++ [GLUSTER_CLI_GANESHA] = {"GANESHA", GLUSTER_CLI_GANESHA, ++ glusterd_handle_ganesha_cmd, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", GLUSTER_CLI_GET_VOL_OPT, + glusterd_handle_get_vol_opt, NULL, 0, DRC_NA}, + [GLUSTER_CLI_BITROT] = {"BITROT", GLUSTER_CLI_BITROT, +diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h +index 1a4bd54..9558480 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-messages.h ++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h +@@ -297,7 +297,7 @@ GLFS_MSGID( + GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL, + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, +- GD_MSG_MANAGER_FUNCTION_FAILED, ++ GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED, + GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 12d857a..a630c48 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -1176,6 +1176,13 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) + if (ret) + goto out; + ++ if ((strcmp(key, "ganesha.enable") == 0) && ++ (strcmp(value, "off") == 0)) { ++ ret = ganesha_manage_export(dict, "off", op_errstr); ++ if (ret) ++ goto out; ++ } ++ + ret = glusterd_check_quota_cmd(key, value, errstr, sizeof(errstr)); + if (ret) + goto out; +@@ -1677,6 +1684,20 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr) + goto out; + } + ++ /* * ++ * If key ganesha.enable is set, then volume should be unexported from ++ * ganesha server. Also it is a volume-level option, perform only when ++ * volume name not equal to "all"(in other words if volinfo != NULL) ++ */ ++ if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) { ++ if (glusterd_check_ganesha_export(volinfo)) { ++ ret = ganesha_manage_export(dict, "off", op_errstr); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, ++ "Could not reset ganesha.enable key"); ++ } ++ } ++ + if (strcmp(key, "all")) { + exists = glusterd_check_option_exists(key, &key_fixed); + if (exists == -1) { +@@ -2393,6 +2414,15 @@ glusterd_op_reset_volume(dict_t *dict, char **op_rspstr) + } + } + ++ if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) { ++ if (glusterd_check_ganesha_export(volinfo)) { ++ ret = manage_export_config(volname, "off", op_rspstr); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, ++ "Could not reset ganesha.enable key"); ++ } ++ } ++ + out: + GF_FREE(key_fixed); + if (quorum_action) +@@ -2964,6 +2994,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + } + } + ++ ret = glusterd_check_ganesha_cmd(key, value, errstr, dict); ++ if (ret == -1) ++ goto out; ++ + if (!is_key_glusterd_hooks_friendly(key)) { + ret = glusterd_check_option_exists(key, &key_fixed); + GF_ASSERT(ret); +@@ -4494,7 +4528,8 @@ glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx) + + case GD_OP_SYNC_VOLUME: + case GD_OP_COPY_FILE: +- case GD_OP_SYS_EXEC: { ++ case GD_OP_SYS_EXEC: ++ case GD_OP_GANESHA: { + dict_copy(dict, req_dict); + } break; + +@@ -5944,6 +5979,10 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr, + ret = glusterd_op_stage_set_volume(dict, op_errstr); + break; + ++ case GD_OP_GANESHA: ++ ret = glusterd_op_stage_set_ganesha(dict, op_errstr); ++ break; ++ + case GD_OP_RESET_VOLUME: + ret = glusterd_op_stage_reset_volume(dict, op_errstr); + break; +@@ -6074,7 +6113,9 @@ glusterd_op_commit_perform(glusterd_op_t op, dict_t *dict, char **op_errstr, + case GD_OP_SET_VOLUME: + ret = glusterd_op_set_volume(dict, op_errstr); + break; +- ++ case GD_OP_GANESHA: ++ ret = glusterd_op_set_ganesha(dict, op_errstr); ++ break; + case GD_OP_RESET_VOLUME: + ret = glusterd_op_reset_volume(dict, op_errstr); + break; +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index 2958443..041946d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -3788,6 +3788,148 @@ out: + return ret; + } + ++/* * ++ * Here there are two possibilities, either destination is snaphot or ++ * clone. In the case of snapshot nfs_ganesha export file will be copied ++ * to snapdir. If it is clone , then new export file will be created for ++ * the clone in the GANESHA_EXPORT_DIRECTORY, replacing occurences of ++ * volname with clonename ++ */ ++int ++glusterd_copy_nfs_ganesha_file(glusterd_volinfo_t *src_vol, ++ glusterd_volinfo_t *dest_vol) ++{ ++ int32_t ret = -1; ++ char snap_dir[PATH_MAX] = { ++ 0, ++ }; ++ char src_path[PATH_MAX] = { ++ 0, ++ }; ++ char dest_path[PATH_MAX] = { ++ 0, ++ }; ++ char buffer[BUFSIZ] = { ++ 0, ++ }; ++ char *find_ptr = NULL; ++ char *buff_ptr = NULL; ++ char *tmp_ptr = NULL; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ struct stat stbuf = { ++ 0, ++ }; ++ FILE *src = NULL; ++ FILE *dest = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("snapshot", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, src_vol, out); ++ GF_VALIDATE_OR_GOTO(this->name, dest_vol, out); ++ ++ if (glusterd_check_ganesha_export(src_vol) == _gf_false) { ++ gf_msg_debug(this->name, 0, ++ "%s is not exported via " ++ "NFS-Ganesha. Skipping copy of export conf.", ++ src_vol->volname); ++ ret = 0; ++ goto out; ++ } ++ ++ if (src_vol->is_snap_volume) { ++ GLUSTERD_GET_SNAP_DIR(snap_dir, src_vol->snapshot, priv); ++ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", snap_dir, ++ src_vol->snapshot->snapname); ++ } else { ++ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", ++ GANESHA_EXPORT_DIRECTORY, src_vol->volname); ++ } ++ if (ret < 0 || ret >= PATH_MAX) ++ goto out; ++ ++ ret = sys_lstat(src_path, &stbuf); ++ if (ret) { ++ /* ++ * This code path is hit, only when the src_vol is being * ++ * exported via NFS-Ganesha. So if the conf file is not * ++ * available, we fail the snapshot operation. * ++ */ ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, ++ "Stat on %s failed with %s", src_path, strerror(errno)); ++ goto out; ++ } ++ ++ if (dest_vol->is_snap_volume) { ++ memset(snap_dir, 0, PATH_MAX); ++ GLUSTERD_GET_SNAP_DIR(snap_dir, dest_vol->snapshot, priv); ++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", ++ snap_dir, dest_vol->snapshot->snapname); ++ if (ret < 0) ++ goto out; ++ ++ ret = glusterd_copy_file(src_path, dest_path); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Failed to copy %s in %s", src_path, dest_path); ++ goto out; ++ } ++ ++ } else { ++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", ++ GANESHA_EXPORT_DIRECTORY, dest_vol->volname); ++ if (ret < 0) ++ goto out; ++ ++ src = fopen(src_path, "r"); ++ dest = fopen(dest_path, "w"); ++ ++ if (!src || !dest) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, ++ "Failed to open %s", dest ? src_path : dest_path); ++ ret = -1; ++ goto out; ++ } ++ ++ /* * ++ * if the source volume is snapshot, the export conf file ++ * consists of orginal volname ++ */ ++ if (src_vol->is_snap_volume) ++ find_ptr = gf_strdup(src_vol->parent_volname); ++ else ++ find_ptr = gf_strdup(src_vol->volname); ++ ++ if (!find_ptr) ++ goto out; ++ ++ /* Replacing volname with clonename */ ++ while (fgets(buffer, BUFSIZ, src)) { ++ buff_ptr = buffer; ++ while ((tmp_ptr = strstr(buff_ptr, find_ptr))) { ++ while (buff_ptr < tmp_ptr) ++ fputc((int)*buff_ptr++, dest); ++ fputs(dest_vol->volname, dest); ++ buff_ptr += strlen(find_ptr); ++ } ++ fputs(buff_ptr, dest); ++ memset(buffer, 0, BUFSIZ); ++ } ++ } ++out: ++ if (src) ++ fclose(src); ++ if (dest) ++ fclose(dest); ++ if (find_ptr) ++ GF_FREE(find_ptr); ++ ++ return ret; ++} ++ + int32_t + glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol) + { +@@ -3876,6 +4018,60 @@ out: + return ret; + } + ++int ++glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol, ++ glusterd_snap_t *snap) ++{ ++ int32_t ret = -1; ++ char snap_dir[PATH_MAX] = ""; ++ char src_path[PATH_MAX] = ""; ++ char dest_path[PATH_MAX] = ""; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ struct stat stbuf = { ++ 0, ++ }; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("snapshot", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, src_vol, out); ++ GF_VALIDATE_OR_GOTO(this->name, snap, out); ++ ++ GLUSTERD_GET_SNAP_DIR(snap_dir, snap, priv); ++ ++ ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir, ++ snap->snapname); ++ if (ret < 0) ++ goto out; ++ ++ ret = sys_lstat(src_path, &stbuf); ++ if (ret) { ++ if (errno == ENOENT) { ++ ret = 0; ++ gf_msg_debug(this->name, 0, "%s not found", src_path); ++ } else ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, ++ "Stat on %s failed with %s", src_path, strerror(errno)); ++ goto out; ++ } ++ ++ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf", ++ GANESHA_EXPORT_DIRECTORY, src_vol->volname); ++ if (ret < 0) ++ goto out; ++ ++ ret = glusterd_copy_file(src_path, dest_path); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Failed to copy %s in %s", src_path, dest_path); ++ ++out: ++ return ret; ++} ++ + /* Snapd functions */ + int + glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo) +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h +index e60be6e..41d0001 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.h ++++ b/xlators/mgmt/glusterd/src/glusterd-store.h +@@ -118,6 +118,8 @@ typedef enum glusterd_store_ver_ac_ { + #define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED "migration-skipped" + #define GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME "migration-run-time" + ++#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha" ++ + int32_t + glusterd_store_volinfo(glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 86ef470..a0417ca 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1823,6 +1823,18 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr) + goto out; + } + ++ ret = glusterd_check_ganesha_export(volinfo); ++ if (ret) { ++ ret = ganesha_manage_export(dict, "off", op_errstr); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, ++ GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL, ++ "Could not " ++ "unexport volume via NFS-Ganesha"); ++ ret = 0; ++ } ++ } ++ + if (glusterd_is_defrag_on(volinfo)) { + snprintf(msg, sizeof(msg), + "rebalance session is " +@@ -2674,6 +2686,8 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr) + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_svc_t *svc = NULL; ++ char *str = NULL; ++ gf_boolean_t option = _gf_false; + + this = THIS; + GF_ASSERT(this); +@@ -2731,6 +2745,29 @@ glusterd_op_start_volume(dict_t *dict, char **op_errstr) + } + } + ++ ret = dict_get_str(conf->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, ++ "Global dict not present."); ++ ret = 0; ++ ++ } else { ++ ret = gf_string2boolean(str, &option); ++ /* Check if the feature is enabled and set nfs-disable to true */ ++ if (option) { ++ gf_msg_debug(this->name, 0, "NFS-Ganesha is enabled"); ++ /* Gluster-nfs should not start when NFS-Ganesha is enabled*/ ++ ret = dict_set_str(volinfo->dict, NFS_DISABLE_MAP_KEY, "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, ++ "Failed to set nfs.disable for" ++ "volume %s", ++ volname); ++ goto out; ++ } ++ } ++ } ++ + ret = glusterd_start_volume(volinfo, flags, _gf_true); + if (ret) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index d1244e4..13f423a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2597,6 +2597,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/upcall", + .op_version = GD_OP_VERSION_3_7_0, + }, ++ { ++ .key = "ganesha.enable", ++ .voltype = "features/ganesha", ++ .value = "off", ++ .option = "ganesha.enable", ++ .op_version = GD_OP_VERSION_3_7_0, ++ }, + /* Lease translator options */ + { + .key = "features.leases", +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 67867f8..5135181 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -61,6 +61,9 @@ + #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging" + #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" + ++#define GANESHA_HA_CONF CONFDIR "/ganesha-ha.conf" ++#define GANESHA_EXPORT_DIRECTORY CONFDIR "/exports" ++ + #define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 + #define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 + #define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 +@@ -1356,6 +1359,25 @@ glusterd_op_stop_volume(dict_t *dict); + int + glusterd_op_delete_volume(dict_t *dict); + int ++glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, ++ char *value); ++int ++glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict); ++int ++glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr); ++int ++glusterd_op_set_ganesha(dict_t *dict, char **errstr); ++int ++ganesha_manage_export(dict_t *dict, char *value, char **op_errstr); ++gf_boolean_t ++glusterd_is_ganesha_cluster(); ++gf_boolean_t ++glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo); ++int ++stop_ganesha(char **op_errstr); ++int ++tear_down_cluster(gf_boolean_t run_teardown); ++int + manage_export_config(char *volname, char *value, char **op_errstr); + + int +-- +1.8.3.1 + diff --git a/SOURCES/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch b/SOURCES/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch new file mode 100644 index 0000000..b2d6052 --- /dev/null +++ b/SOURCES/0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch @@ -0,0 +1,1897 @@ +From 267135e74d94d8a1e56a263b37ca4689020b9e53 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 16 Oct 2017 16:58:28 +0530 +Subject: [PATCH 055/124] Revert "storhaug HA: first step, remove resource + agents and setup script" + +This reverts commit c822e354e16646adf18bbc5123798663faa543b2. + +Label: DOWNSTREAM ONLY + +Change-Id: Idd50fe1a5be5a3258d560518d810f9ec4c57621a +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167104 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + configure.ac | 1 + + extras/ganesha/Makefile.am | 2 +- + extras/ganesha/ocf/Makefile.am | 11 + + extras/ganesha/ocf/ganesha_grace | 221 +++++++ + extras/ganesha/ocf/ganesha_mon | 234 +++++++ + extras/ganesha/ocf/ganesha_nfsd | 167 +++++ + extras/ganesha/scripts/Makefile.am | 6 +- + extras/ganesha/scripts/ganesha-ha.sh | 1125 ++++++++++++++++++++++++++++++++++ + glusterfs.spec.in | 8 +- + 9 files changed, 1771 insertions(+), 4 deletions(-) + create mode 100644 extras/ganesha/ocf/Makefile.am + create mode 100644 extras/ganesha/ocf/ganesha_grace + create mode 100644 extras/ganesha/ocf/ganesha_mon + create mode 100644 extras/ganesha/ocf/ganesha_nfsd + create mode 100644 extras/ganesha/scripts/ganesha-ha.sh + +diff --git a/configure.ac b/configure.ac +index 125ae29..baa811a 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -199,6 +199,7 @@ AC_CONFIG_FILES([Makefile + extras/ganesha/Makefile + extras/ganesha/config/Makefile + extras/ganesha/scripts/Makefile ++ extras/ganesha/ocf/Makefile + extras/systemd/Makefile + extras/systemd/glusterd.service + extras/systemd/glustereventsd.service +diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am +index 542de68..9eaa401 100644 +--- a/extras/ganesha/Makefile.am ++++ b/extras/ganesha/Makefile.am +@@ -1,2 +1,2 @@ +-SUBDIRS = scripts config ++SUBDIRS = scripts config ocf + CLEANFILES = +diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am +new file mode 100644 +index 0000000..990a609 +--- /dev/null ++++ b/extras/ganesha/ocf/Makefile.am +@@ -0,0 +1,11 @@ ++EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd ++ ++# The root of the OCF resource agent hierarchy ++# Per the OCF standard, it's always "lib", ++# not "lib64" (even on 64-bit platforms). ++ocfdir = $(prefix)/lib/ocf ++ ++# The provider directory ++radir = $(ocfdir)/resource.d/heartbeat ++ ++ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd +diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace +new file mode 100644 +index 0000000..825f716 +--- /dev/null ++++ b/extras/ganesha/ocf/ganesha_grace +@@ -0,0 +1,221 @@ ++#!/bin/bash ++# ++# Copyright (c) 2014 Anand Subramanian anands@redhat.com ++# Copyright (c) 2015 Red Hat Inc. ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++if [ -n "$OCF_DEBUG_LIBRARY" ]; then ++ . $OCF_DEBUG_LIBRARY ++else ++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++fi ++ ++OCF_RESKEY_grace_active_default="grace-active" ++: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} ++ ++ganesha_meta_data() { ++ cat <<END ++<?xml version="1.0"?> ++<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> ++<resource-agent name="ganesha_grace"> ++<version>1.0</version> ++ ++<longdesc lang="en"> ++This Linux-specific resource agent acts as a dummy ++resource agent for nfs-ganesha. ++</longdesc> ++ ++<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> ++ ++<parameters> ++<parameter name="grace_active"> ++<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> ++<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> ++<content type="string" default="grace-active" /> ++</parameter> ++</parameters> ++ ++<actions> ++<action name="start" timeout="40s" /> ++<action name="stop" timeout="40s" /> ++<action name="status" timeout="20s" interval="60s" /> ++<action name="monitor" depth="0" timeout="10s" interval="5s" /> ++<action name="notify" timeout="10s" /> ++<action name="meta-data" timeout="20s" /> ++</actions> ++</resource-agent> ++END ++ ++return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_usage() { ++ echo "ganesha.nfsd USAGE" ++} ++ ++# Make sure meta-data and usage always succeed ++case $__OCF_ACTION in ++ meta-data) ganesha_meta_data ++ exit ${OCF_SUCCESS} ++ ;; ++ usage|help) ganesha_usage ++ exit ${OCF_SUCCESS} ++ ;; ++ *) ++ ;; ++esac ++ ++ganesha_grace_start() ++{ ++ local rc=${OCF_ERR_GENERIC} ++ local host=$(hostname -s) ++ ++ ocf_log debug "ganesha_grace_start()" ++ # give ganesha_mon RA a chance to set the crm_attr first ++ # I mislike the sleep, but it's not clear that looping ++ # with a small sleep is necessarily better ++ # start has a 40sec timeout, so a 5sec sleep here is okay ++ sleep 5 ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null ) ++ if [ $? -ne 0 ]; then ++ ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" ++ fi ++ fi ++ ++ # Three possibilities: ++ # 1. There is no attribute at all and attr_updater returns ++ # a zero length string. This happens when ++ # ganesha_mon::monitor hasn't run at least once to set ++ # the attribute. The assumption here is that the system ++ # is coming up. We pretend, for now, that the node is ++ # healthy, to allow the system to continue coming up. ++ # It will cure itself in a few seconds ++ # 2. There is an attribute, and it has the value "1"; this ++ # node is healthy. ++ # 3. There is an attribute, but it has no value or the value ++ # "0"; this node is not healthy. ++ ++ # case 1 ++ if [[ -z "${attr}" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ # case 2 ++ if [[ "${attr}" = *"value=1" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ # case 3 ++ return ${OCF_NOT_RUNNING} ++} ++ ++ganesha_grace_stop() ++{ ++ ++ ocf_log debug "ganesha_grace_stop()" ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_notify() ++{ ++ # since this is a clone RA we should only ever see pre-start ++ # or post-stop ++ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ++ case "${mode}" in ++ pre-start | post-stop) ++ dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} ++ if [ $? -ne 0 ]; then ++ ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed" ++ fi ++ ;; ++ esac ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_monitor() ++{ ++ local host=$(hostname -s) ++ ++ ocf_log debug "monitor" ++ ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) ++ if [ $? -ne 0 ]; then ++ ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" ++ fi ++ fi ++ ++ # if there is no attribute (yet), maybe it's because ++ # this RA started before ganesha_mon (nfs-mon) has had ++ # chance to create it. In which case we'll pretend ++ # everything is okay this time around ++ if [[ -z "${attr}" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ if [[ "${attr}" = *"value=1" ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ ++ return ${OCF_NOT_RUNNING} ++} ++ ++ganesha_grace_validate() ++{ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_grace_validate ++ ++# Translate each action into the appropriate function call ++case $__OCF_ACTION in ++start) ganesha_grace_start ++ ;; ++stop) ganesha_grace_stop ++ ;; ++status|monitor) ganesha_grace_monitor ++ ;; ++notify) ganesha_grace_notify ++ ;; ++*) ganesha_grace_usage ++ exit ${OCF_ERR_UNIMPLEMENTED} ++ ;; ++esac ++ ++rc=$? ++ ++# The resource agent may optionally log a debug message ++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" ++exit $rc +diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon +new file mode 100644 +index 0000000..2b4a9d6 +--- /dev/null ++++ b/extras/ganesha/ocf/ganesha_mon +@@ -0,0 +1,234 @@ ++#!/bin/bash ++# ++# Copyright (c) 2014 Anand Subramanian anands@redhat.com ++# Copyright (c) 2015 Red Hat Inc. ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++if [ -n "${OCF_DEBUG_LIBRARY}" ]; then ++ . ${OCF_DEBUG_LIBRARY} ++else ++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++fi ++ ++# Defaults ++OCF_RESKEY_ganesha_active_default="ganesha-active" ++OCF_RESKEY_grace_active_default="grace-active" ++OCF_RESKEY_grace_delay_default="5" ++ ++: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}} ++: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} ++: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}} ++ ++ganesha_meta_data() { ++ cat <<END ++<?xml version="1.0"?> ++<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> ++<resource-agent name="ganesha_mon"> ++<version>1.0</version> ++ ++<longdesc lang="en"> ++This Linux-specific resource agent acts as a dummy ++resource agent for nfs-ganesha. ++</longdesc> ++ ++<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> ++ ++<parameters> ++<parameter name="ganesha_active"> ++<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc> ++<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc> ++<content type="string" default="ganesha-active" /> ++</parameter> ++<parameter name="grace_active"> ++<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> ++<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> ++<content type="string" default="grace-active" /> ++</parameter> ++<parameter name="grace_delay"> ++<longdesc lang="en"> ++NFS-Ganesha grace delay. ++When changing this, adjust the ganesha_grace RA's monitor interval to match. ++</longdesc> ++<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc> ++<content type="string" default="5" /> ++</parameter> ++</parameters> ++ ++<actions> ++<action name="start" timeout="40s" /> ++<action name="stop" timeout="40s" /> ++<action name="status" timeout="20s" interval="60s" /> ++<action name="monitor" depth="0" timeout="10s" interval="10s" /> ++<action name="meta-data" timeout="20s" /> ++</actions> ++</resource-agent> ++END ++ ++return ${OCF_SUCCESS} ++} ++ ++ganesha_mon_usage() { ++ echo "ganesha.nfsd USAGE" ++} ++ ++# Make sure meta-data and usage always succeed ++case ${__OCF_ACTION} in ++ meta-data) ganesha_meta_data ++ exit ${OCF_SUCCESS} ++ ;; ++ usage|help) ganesha_usage ++ exit ${OCF_SUCCESS} ++ ;; ++ *) ++ ;; ++esac ++ ++ganesha_mon_start() ++{ ++ ocf_log debug "ganesha_mon_start" ++ ganesha_mon_monitor ++ return $OCF_SUCCESS ++} ++ ++ganesha_mon_stop() ++{ ++ ocf_log debug "ganesha_mon_stop" ++ return $OCF_SUCCESS ++} ++ ++ganesha_mon_monitor() ++{ ++ local host=$(hostname -s) ++ local pid_file="/var/run/ganesha.pid" ++ local rhel6_pid_file="/var/run/ganesha.nfsd.pid" ++ local proc_pid="/proc/" ++ ++ # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid ++ # RHEL7 systemd does not. Would be nice if all distros used the ++ # same pid file. ++ if [ -e ${rhel6_pid_file} ]; then ++ pid_file=${rhel6_pid_file} ++ fi ++ if [ -e ${pid_file} ]; then ++ proc_pid="${proc_pid}$(cat ${pid_file})" ++ fi ++ ++ if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then ++ ++ attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed" ++ fi ++ ++ # ganesha_grace (nfs-grace) RA follows grace-active attr ++ # w/ constraint location ++ attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed" ++ fi ++ ++ # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace) ++ # track grace-active crm_attr (attr != crm_attr) ++ # we can't just use the attr as there's no way to query ++ # its value in RHEL6 pacemaker ++ ++ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null ++ if [ $? -ne 0 ]; then ++ ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed" ++ fi ++ fi ++ ++ return ${OCF_SUCCESS} ++ fi ++ ++ # VIP fail-over is triggered by clearing the ++ # ganesha-active node attribute on this node. ++ # ++ # Meanwhile the ganesha_grace notify() runs when its ++ # nfs-grace resource is disabled on a node; which ++ # is triggered by clearing the grace-active attribute ++ # on this node. ++ # ++ # We need to allow time for it to run and put ++ # the remaining ganesha.nfsds into grace before ++ # initiating the VIP fail-over. ++ ++ attrd_updater -D -n ${OCF_RESKEY_grace_active} ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed" ++ fi ++ ++ host=$(hostname -s) ++ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null ++ if [ $? -ne 0 ]; then ++ host=$(hostname) ++ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null ++ if [ $? -ne 0 ]; then ++ ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed" ++ fi ++ fi ++ ++ sleep ${OCF_RESKEY_grace_delay} ++ ++ attrd_updater -D -n ${OCF_RESKEY_ganesha_active} ++ if [ $? -ne 0 ]; then ++ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed" ++ fi ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_mon_validate() ++{ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_mon_validate ++ ++# Translate each action into the appropriate function call ++case ${__OCF_ACTION} in ++start) ganesha_mon_start ++ ;; ++stop) ganesha_mon_stop ++ ;; ++status|monitor) ganesha_mon_monitor ++ ;; ++*) ganesha_mon_usage ++ exit ${OCF_ERR_UNIMPLEMENTED} ++ ;; ++esac ++ ++rc=$? ++ ++# The resource agent may optionally log a debug message ++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" ++exit $rc +diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd +new file mode 100644 +index 0000000..93fc8be +--- /dev/null ++++ b/extras/ganesha/ocf/ganesha_nfsd +@@ -0,0 +1,167 @@ ++#!/bin/bash ++# ++# Copyright (c) 2014 Anand Subramanian anands@redhat.com ++# Copyright (c) 2015 Red Hat Inc. ++# All Rights Reserved. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of version 2 of the GNU General Public License as ++# published by the Free Software Foundation. ++# ++# This program is distributed in the hope that it would be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ++# ++# Further, this software is distributed without any warranty that it is ++# free of the rightful claim of any third person regarding infringement ++# or the like. Any license provided herein, whether implied or ++# otherwise, applies only to this software file. Patent licenses, if ++# any, provided herein do not apply to combinations of this program with ++# other software, or any other product whatsoever. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write the Free Software Foundation, ++# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. ++# ++# ++ ++# Initialization: ++: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++ ++if [ -n "${OCF_DEBUG_LIBRARY}" ]; then ++ . ${OCF_DEBUG_LIBRARY} ++else ++ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} ++ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ++fi ++ ++OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage" ++: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} ++ ++ganesha_meta_data() { ++ cat <<END ++<?xml version="1.0"?> ++<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> ++<resource-agent name="ganesha_nfsd"> ++<version>1.0</version> ++ ++<longdesc lang="en"> ++This Linux-specific resource agent acts as a dummy ++resource agent for nfs-ganesha. ++</longdesc> ++ ++<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> ++ ++<parameters> ++<parameter name="ha_vol_mnt"> ++<longdesc lang="en">HA State Volume Mount Point</longdesc> ++<shortdesc lang="en">HA_State Volume Mount Point</shortdesc> ++<content type="string" default="" /> ++</parameter> ++</parameters> ++ ++<actions> ++<action name="start" timeout="5s" /> ++<action name="stop" timeout="5s" /> ++<action name="status" depth="0" timeout="5s" interval="0" /> ++<action name="monitor" depth="0" timeout="5s" interval="0" /> ++<action name="meta-data" timeout="20s" /> ++</actions> ++</resource-agent> ++END ++ ++return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_usage() { ++ echo "ganesha.nfsd USAGE" ++} ++ ++# Make sure meta-data and usage always succeed ++case $__OCF_ACTION in ++ meta-data) ganesha_meta_data ++ exit ${OCF_SUCCESS} ++ ;; ++ usage|help) ganesha_usage ++ exit ${OCF_SUCCESS} ++ ;; ++ *) ++ ;; ++esac ++ ++ganesha_nfsd_start() ++{ ++ local long_host=$(hostname) ++ ++ if [[ -d /var/lib/nfs ]]; then ++ mv /var/lib/nfs /var/lib/nfs.backup ++ if [ $? -ne 0 ]; then ++ ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed" ++ fi ++ ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs ++ if [ $? -ne 0 ]; then ++ ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed" ++ fi ++ fi ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_stop() ++{ ++ ++ if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then ++ rm -f /var/lib/nfs ++ if [ $? -ne 0 ]; then ++ ocf_log notice "rm -f /var/lib/nfs failed" ++ fi ++ mv /var/lib/nfs.backup /var/lib/nfs ++ if [ $? -ne 0 ]; then ++ ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed" ++ fi ++ fi ++ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_monitor() ++{ ++ # pacemaker checks to see if RA is already running before starting it. ++ # if we return success, then it's presumed it's already running and ++ # doesn't need to be started, i.e. invoke the start action. ++ # return something other than success to make pacemaker invoke the ++ # start action ++ if [[ -L /var/lib/nfs ]]; then ++ return ${OCF_SUCCESS} ++ fi ++ return ${OCF_NOT_RUNNING} ++} ++ ++ganesha_nfsd_validate() ++{ ++ return ${OCF_SUCCESS} ++} ++ ++ganesha_nfsd_validate ++ ++# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" ++ ++# Translate each action into the appropriate function call ++case $__OCF_ACTION in ++start) ganesha_nfsd_start ++ ;; ++stop) ganesha_nfsd_stop ++ ;; ++status|monitor) ganesha_nfsd_monitor ++ ;; ++*) ganesha_nfsd_usage ++ exit ${OCF_ERR_UNIMPLEMENTED} ++ ;; ++esac ++ ++rc=$? ++ ++# The resource agent may optionally log a debug message ++ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" ++exit $rc +diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am +index 00a2c45..7e345fd 100644 +--- a/extras/ganesha/scripts/Makefile.am ++++ b/extras/ganesha/scripts/Makefile.am +@@ -1,4 +1,6 @@ +-EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh ++EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \ ++ ganesha-ha.sh + + scriptsdir = $(libexecdir)/ganesha +-scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py ++scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \ ++ ganesha-ha.sh +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +new file mode 100644 +index 0000000..6b011be +--- /dev/null ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -0,0 +1,1125 @@ ++#!/bin/bash ++ ++# Copyright 2015-2016 Red Hat Inc. All Rights Reserved ++# ++# Pacemaker+Corosync High Availability for NFS-Ganesha ++# ++# setup, teardown, add, delete, refresh-config, and status ++# ++# Each participating node in the cluster is assigned a virtual IP (VIP) ++# which fails over to another node when its associated ganesha.nfsd dies ++# for any reason. After the VIP is moved to another node all the ++# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE. ++# ++# There are six resource agent types used: ganesha_mon, ganesha_grace, ++# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the ++# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put ++# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start ++# and stop the ganesha.nfsd during setup and teardown. IPaddr manages ++# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to ++# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to ++# the new host. ++ ++HA_NUM_SERVERS=0 ++HA_SERVERS="" ++HA_VOL_NAME="gluster_shared_storage" ++HA_VOL_MNT="/var/run/gluster/shared_storage" ++HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" ++SERVICE_MAN="DISTRO_NOT_FOUND" ++ ++RHEL6_PCS_CNAME_OPTION="--name" ++SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" ++ ++# UNBLOCK RA uses shared_storage which may become unavailable ++# during any of the nodes reboot. Hence increase timeout value. ++PORTBLOCK_UNBLOCK_TIMEOUT="60s" ++ ++# Try loading the config from any of the distro ++# specific configuration locations ++if [ -f /etc/sysconfig/ganesha ] ++ then ++ . /etc/sysconfig/ganesha ++fi ++if [ -f /etc/conf.d/ganesha ] ++ then ++ . /etc/conf.d/ganesha ++fi ++if [ -f /etc/default/ganesha ] ++ then ++ . /etc/default/ganesha ++fi ++ ++GANESHA_CONF= ++ ++function find_rhel7_conf ++{ ++ while [[ $# > 0 ]] ++ do ++ key="$1" ++ case $key in ++ -f) ++ CONFFILE="$2" ++ break; ++ ;; ++ *) ++ ;; ++ esac ++ shift ++ done ++} ++ ++if [ -z $CONFFILE ] ++ then ++ find_rhel7_conf $OPTIONS ++ ++fi ++ ++GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} ++ ++usage() { ++ ++ echo "Usage : add|delete|refresh-config|status" ++ echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ ++<NODE-HOSTNAME> <NODE-VIP>" ++ echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ ++<NODE-HOSTNAME>" ++ echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \ ++<volume>" ++ echo "Status : ganesha-ha.sh --status <HA_CONFDIR>" ++} ++ ++determine_service_manager () { ++ ++ if [ -e "/usr/bin/systemctl" ]; ++ then ++ SERVICE_MAN="/usr/bin/systemctl" ++ elif [ -e "/sbin/invoke-rc.d" ]; ++ then ++ SERVICE_MAN="/sbin/invoke-rc.d" ++ elif [ -e "/sbin/service" ]; ++ then ++ SERVICE_MAN="/sbin/service" ++ fi ++ if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ] ++ then ++ echo "Service manager not recognized, exiting" ++ exit 1 ++ fi ++} ++ ++manage_service () ++{ ++ local action=${1} ++ local new_node=${2} ++ local option= ++ ++ if [ "$action" == "start" ]; then ++ option="yes" ++ else ++ option="no" ++ fi ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option" ++ ++ if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ] ++ then ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha" ++ else ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}" ++ fi ++} ++ ++ ++check_cluster_exists() ++{ ++ local name=${1} ++ local cluster_name="" ++ ++ if [ -e /var/run/corosync.pid ]; then ++ cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) ++ if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then ++ logger "$name already exists, exiting" ++ exit 0 ++ fi ++ fi ++} ++ ++ ++determine_servers() ++{ ++ local cmd=${1} ++ local num_servers=0 ++ local tmp_ifs=${IFS} ++ local ha_servers="" ++ ++ if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then ++ ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') ++ IFS=$' ' ++ for server in ${ha_servers} ; do ++ num_servers=$(expr ${num_servers} + 1) ++ done ++ IFS=${tmp_ifs} ++ HA_NUM_SERVERS=${num_servers} ++ HA_SERVERS="${ha_servers}" ++ else ++ IFS=$',' ++ for server in ${HA_CLUSTER_NODES} ; do ++ num_servers=$(expr ${num_servers} + 1) ++ done ++ IFS=${tmp_ifs} ++ HA_NUM_SERVERS=${num_servers} ++ HA_SERVERS="${HA_CLUSTER_NODES//,/ }" ++ fi ++} ++ ++ ++setup_cluster() ++{ ++ local name=${1} ++ local num_servers=${2} ++ local servers=${3} ++ local unclean="" ++ local quorum_policy="stop" ++ ++ logger "setting up cluster ${name} with the following ${servers}" ++ ++ pcs cluster auth ${servers} ++ # pcs cluster setup --name ${name} ${servers} ++ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} ++ if [ $? -ne 0 ]; then ++ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" ++ exit 1; ++ fi ++ pcs cluster start --all ++ if [ $? -ne 0 ]; then ++ logger "pcs cluster start failed" ++ exit 1; ++ fi ++ ++ sleep 1 ++ # wait for the cluster to elect a DC before querying or writing ++ # to the CIB. BZ 1334092 ++ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 ++ while [ $? -ne 0 ]; do ++ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 ++ done ++ ++ unclean=$(pcs status | grep -u "UNCLEAN") ++ while [[ "${unclean}X" = "UNCLEANX" ]]; do ++ sleep 1 ++ unclean=$(pcs status | grep -u "UNCLEAN") ++ done ++ sleep 1 ++ ++ if [ ${num_servers} -lt 3 ]; then ++ quorum_policy="ignore" ++ fi ++ pcs property set no-quorum-policy=${quorum_policy} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" ++ fi ++ ++ pcs property set stonith-enabled=false ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs property set stonith-enabled=false failed" ++ fi ++} ++ ++ ++setup_finalize_ha() ++{ ++ local cibfile=${1} ++ local stopped="" ++ ++ stopped=$(pcs status | grep -u "Stopped") ++ while [[ "${stopped}X" = "StoppedX" ]]; do ++ sleep 1 ++ stopped=$(pcs status | grep -u "Stopped") ++ done ++} ++ ++ ++refresh_config () ++{ ++ local short_host=$(hostname -s) ++ local VOL=${1} ++ local HA_CONFDIR=${2} ++ local short_host=$(hostname -s) ++ ++ local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ ++ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') ++ ++ ++ if [ -e ${SECRET_PEM} ]; then ++ while [[ ${3} ]]; do ++ current_host=`echo ${3} | cut -d "." -f 1` ++ if [ ${short_host} != ${current_host} ]; then ++ output=$(ssh -oPasswordAuthentication=no \ ++-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ ++"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ ++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ ++string:$HA_CONFDIR/exports/export.$VOL.conf \ ++string:\"EXPORT(Export_Id=$export_id)\" 2>&1") ++ ret=$? ++ logger <<< "${output}" ++ if [ ${ret} -ne 0 ]; then ++ echo "Error: refresh-config failed on ${current_host}." ++ exit 1 ++ else ++ echo "Refresh-config completed on ${current_host}." ++ fi ++ ++ fi ++ shift ++ done ++ else ++ echo "Error: refresh-config failed. Passwordless ssh is not enabled." ++ exit 1 ++ fi ++ ++ # Run the same command on the localhost, ++ output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ ++/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ ++string:$HA_CONFDIR/exports/export.$VOL.conf \ ++string:"EXPORT(Export_Id=$export_id)" 2>&1) ++ ret=$? ++ logger <<< "${output}" ++ if [ ${ret} -ne 0 ] ; then ++ echo "Error: refresh-config failed on localhost." ++ exit 1 ++ else ++ echo "Success: refresh-config completed." ++ fi ++} ++ ++ ++teardown_cluster() ++{ ++ local name=${1} ++ ++ for server in ${HA_SERVERS} ; do ++ if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then ++ logger "info: ${server} is not in config, removing" ++ ++ pcs cluster stop ${server} --force ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster stop ${server} failed" ++ fi ++ ++ pcs cluster node remove ${server} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster node remove ${server} failed" ++ fi ++ fi ++ done ++ ++ # BZ 1193433 - pcs doesn't reload cluster.conf after modification ++ # after teardown completes, a subsequent setup will appear to have ++ # 'remembered' the deleted node. You can work around this by ++ # issuing another `pcs cluster node remove $node`, ++ # `crm_node -f -R $server`, or ++ # `cibadmin --delete --xml-text '<node id="$server" ++ # uname="$server"/>' ++ ++ pcs cluster stop --all ++ if [ $? -ne 0 ]; then ++ logger "warning pcs cluster stop --all failed" ++ fi ++ ++ pcs cluster destroy ++ if [ $? -ne 0 ]; then ++ logger "error pcs cluster destroy failed" ++ exit 1 ++ fi ++} ++ ++ ++cleanup_ganesha_config () ++{ ++ rm -f /etc/corosync/corosync.conf ++ rm -rf /etc/cluster/cluster.conf* ++ rm -rf /var/lib/pacemaker/cib/* ++} ++ ++do_create_virt_ip_constraints() ++{ ++ local cibfile=${1}; shift ++ local primary=${1}; shift ++ local weight="1000" ++ ++ # first a constraint location rule that says the VIP must be where ++ # there's a ganesha.nfsd running ++ pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed" ++ fi ++ ++ # then a set of constraint location prefers to set the prefered order ++ # for where a VIP should move ++ while [[ ${1} ]]; do ++ pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed" ++ fi ++ weight=$(expr ${weight} + 1000) ++ shift ++ done ++ # and finally set the highest preference for the VIP to its home node ++ # default weight when created is/was 100. ++ # on Fedora setting appears to be additive, so to get the desired ++ # value we adjust the weight ++ # weight=$(expr ${weight} - 100) ++ pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed" ++ fi ++} ++ ++ ++wrap_create_virt_ip_constraints() ++{ ++ local cibfile=${1}; shift ++ local primary=${1}; shift ++ local head="" ++ local tail="" ++ ++ # build a list of peers, e.g. for a four node cluster, for node1, ++ # the result is "node2 node3 node4"; for node2, "node3 node4 node1" ++ # and so on. ++ while [[ ${1} ]]; do ++ if [ "${1}" = "${primary}" ]; then ++ shift ++ while [[ ${1} ]]; do ++ tail=${tail}" "${1} ++ shift ++ done ++ else ++ head=${head}" "${1} ++ fi ++ shift ++ done ++ do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head} ++} ++ ++ ++create_virt_ip_constraints() ++{ ++ local cibfile=${1}; shift ++ ++ while [[ ${1} ]]; do ++ wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS} ++ shift ++ done ++} ++ ++ ++setup_create_resources() ++{ ++ local cibfile=$(mktemp -u) ++ ++ # fixup /var/lib/nfs ++ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone" ++ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" ++ fi ++ ++ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed" ++ fi ++ ++ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace ++ # start method. Allow time for ganesha_mon to start and set the ++ # ganesha-active crm_attribute ++ sleep 5 ++ ++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" ++ fi ++ ++ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1" ++ fi ++ ++ pcs cluster cib ${cibfile} ++ ++ while [[ ${1} ]]; do ++ ++ # this is variable indirection ++ # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"' ++ # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...) ++ # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1') ++ # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"') ++ # after the `eval ${clean_nvs}` there is a variable VIP_host_1 ++ # with the value '10_7_6_5', and the following \$$ magic to ++ # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us ++ # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s ++ # to give us ipaddr="10.7.6.5". whew! ++ name="VIP_${1}" ++ clean_name=${name//[-.]/_} ++ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) ++ clean_nvs=${nvs//[-.]/_} ++ eval ${clean_nvs} ++ eval tmp_ipaddr=\$${clean_name} ++ ipaddr=${tmp_ipaddr//_/.} ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=block ip=${ipaddr} --group ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_block failed" ++ fi ++ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s failed" ++ fi ++ ++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" ++ fi ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ ++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ ++ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ ++ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_unblock failed" ++ fi ++ ++ ++ shift ++ done ++ ++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS} ++ ++ pcs cluster cib-push ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs cluster cib-push ${cibfile} failed" ++ fi ++ rm -f ${cibfile} ++} ++ ++ ++teardown_resources() ++{ ++ # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2) ++ ++ # restore /var/lib/nfs ++ logger "notice: pcs resource delete nfs_setup-clone" ++ pcs resource delete nfs_setup-clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete nfs_setup-clone failed" ++ fi ++ ++ # delete -clone resource agents ++ # in particular delete the ganesha monitor so we don't try to ++ # trigger anything when we shut down ganesha next. ++ pcs resource delete nfs-mon-clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete nfs-mon-clone failed" ++ fi ++ ++ pcs resource delete nfs-grace-clone ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete nfs-grace-clone failed" ++ fi ++ ++ while [[ ${1} ]]; do ++ pcs resource delete ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs resource delete ${1}-group failed" ++ fi ++ shift ++ done ++ ++} ++ ++ ++recreate_resources() ++{ ++ local cibfile=${1}; shift ++ ++ while [[ ${1} ]]; do ++ # this is variable indirection ++ # see the comment on the same a few lines up ++ name="VIP_${1}" ++ clean_name=${name//[-.]/_} ++ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) ++ clean_nvs=${nvs//[-.]/_} ++ eval ${clean_nvs} ++ eval tmp_ipaddr=\$${clean_name} ++ ipaddr=${tmp_ipaddr//_/.} ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=block ip=${ipaddr} --group ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_block failed" ++ fi ++ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ ++ cidr_netmask=32 op monitor interval=15s failed" ++ fi ++ ++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" ++ fi ++ ++ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ ++ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ ++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ ++ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ ++ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${1}-nfs_unblock failed" ++ fi ++ ++ shift ++ done ++} ++ ++ ++addnode_recreate_resources() ++{ ++ local cibfile=${1}; shift ++ local add_node=${1}; shift ++ local add_vip=${1}; shift ++ ++ recreate_resources ${cibfile} ${HA_SERVERS} ++ ++ pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \ ++ protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${add_node}-nfs_block failed" ++ fi ++ pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ ++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \ ++ --after ${add_node}-nfs_block ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ ++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" ++ fi ++ ++ pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed" ++ fi ++ pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \ ++ protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \ ++ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \ ++ ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \ ++ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \ ++ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} ++ if [ $? -ne 0 ]; then ++ logger "warning pcs resource create ${add_node}-nfs_unblock failed" ++ fi ++} ++ ++ ++clear_resources() ++{ ++ local cibfile=${1}; shift ++ ++ while [[ ${1} ]]; do ++ pcs -f ${cibfile} resource delete ${1}-group ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs -f ${cibfile} resource delete ${1}-group" ++ fi ++ ++ shift ++ done ++} ++ ++ ++addnode_create_resources() ++{ ++ local add_node=${1}; shift ++ local add_vip=${1}; shift ++ local cibfile=$(mktemp -u) ++ ++ # start HA on the new node ++ pcs cluster start ${add_node} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster start ${add_node} failed" ++ fi ++ ++ pcs cluster cib ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib ${cibfile} failed" ++ fi ++ ++ # delete all the -cluster_ip-1 resources, clearing ++ # their constraints, then create them again so we can ++ # recompute their constraints ++ clear_resources ${cibfile} ${HA_SERVERS} ++ addnode_recreate_resources ${cibfile} ${add_node} ${add_vip} ++ ++ HA_SERVERS="${HA_SERVERS} ${add_node}" ++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS} ++ ++ pcs cluster cib-push ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib-push ${cibfile} failed" ++ fi ++ rm -f ${cibfile} ++} ++ ++ ++deletenode_delete_resources() ++{ ++ local node=${1}; shift ++ local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//) ++ local cibfile=$(mktemp -u) ++ ++ pcs cluster cib ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib ${cibfile} failed" ++ fi ++ ++ # delete all the -cluster_ip-1 and -trigger_ip-1 resources, ++ # clearing their constraints, then create them again so we can ++ # recompute their constraints ++ clear_resources ${cibfile} ${HA_SERVERS} ++ recreate_resources ${cibfile} ${ha_servers} ++ HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /") ++ ++ create_virt_ip_constraints ${cibfile} ${HA_SERVERS} ++ ++ pcs cluster cib-push ${cibfile} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster cib-push ${cibfile} failed" ++ fi ++ rm -f ${cibfile} ++ ++} ++ ++ ++deletenode_update_haconfig() ++{ ++ local name="VIP_${1}" ++ local clean_name=${name//[-.]/_} ++ ++ ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") ++ sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf ++} ++ ++ ++setup_state_volume() ++{ ++ local mnt=${HA_VOL_MNT} ++ local longname="" ++ local shortname="" ++ local dname="" ++ local dirname="" ++ ++ longname=$(hostname) ++ dname=${longname#$(hostname -s)} ++ ++ while [[ ${1} ]]; do ++ ++ if [[ ${1} == *${dname} ]]; then ++ dirname=${1} ++ else ++ dirname=${1}${dname} ++ fi ++ ++ if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then ++ mkdir ${mnt}/nfs-ganesha/tickle_dir ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname} ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ++ fi ++ for server in ${HA_SERVERS} ; do ++ if [ ${server} != ${dirname} ]; then ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} ++ fi ++ done ++ shift ++ done ++ ++} ++ ++ ++addnode_state_volume() ++{ ++ local newnode=${1}; shift ++ local mnt=${HA_VOL_MNT} ++ local longname="" ++ local dname="" ++ local dirname="" ++ ++ longname=$(hostname) ++ dname=${longname#$(hostname -s)} ++ ++ if [[ ${newnode} == *${dname} ]]; then ++ dirname=${newnode} ++ else ++ dirname=${newnode}${dname} ++ fi ++ ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname} ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ fi ++ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then ++ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ fi ++ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then ++ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ++ fi ++ ++ for server in ${HA_SERVERS} ; do ++ ++ if [[ ${server} != ${dirname} ]]; then ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ++ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} ++ ++ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} ++ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} ++ fi ++ done ++ ++} ++ ++ ++delnode_state_volume() ++{ ++ local delnode=${1}; shift ++ local mnt=${HA_VOL_MNT} ++ local longname="" ++ local dname="" ++ local dirname="" ++ ++ longname=$(hostname) ++ dname=${longname#$(hostname -s)} ++ ++ if [[ ${delnode} == *${dname} ]]; then ++ dirname=${delnode} ++ else ++ dirname=${delnode}${dname} ++ fi ++ ++ rm -rf ${mnt}/nfs-ganesha/${dirname} ++ ++ for server in ${HA_SERVERS} ; do ++ if [[ "${server}" != "${dirname}" ]]; then ++ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} ++ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} ++ fi ++ done ++} ++ ++ ++status() ++{ ++ local scratch=$(mktemp) ++ local regex_str="^${1}-cluster_ip-1" ++ local healthy=0 ++ local index=1 ++ local nodes ++ ++ # change tabs to spaces, strip leading spaces ++ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch} ++ ++ nodes[0]=${1}; shift ++ ++ # make a regex of the configured nodes ++ # and initalize the nodes array for later ++ while [[ ${1} ]]; do ++ ++ regex_str="${regex_str}|^${1}-cluster_ip-1" ++ nodes[${index}]=${1} ++ ((index++)) ++ shift ++ done ++ ++ # print the nodes that are expected to be online ++ grep -E "^Online:" ${scratch} ++ ++ echo ++ ++ # print the VIPs and which node they are on ++ grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 ++ ++ echo ++ ++ # check if the VIP and port block/unblock RAs are on the expected nodes ++ for n in ${nodes[*]}; do ++ ++ grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ((healthy+=${result})) ++ grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ((healthy+=${result})) ++ grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ((healthy+=${result})) ++ done ++ ++ grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} ++ result=$? ++ ++ if [ ${result} -eq 0 ]; then ++ echo "Cluster HA Status: BAD" ++ elif [ ${healthy} -eq 0 ]; then ++ echo "Cluster HA Status: HEALTHY" ++ else ++ echo "Cluster HA Status: FAILOVER" ++ fi ++ ++ rm -f ${scratch} ++} ++ ++create_ganesha_conf_file() ++{ ++ if [ $1 == "yes" ]; ++ then ++ if [ -e $GANESHA_CONF ]; ++ then ++ rm -rf $GANESHA_CONF ++ fi ++ # The symlink /etc/ganesha/ganesha.conf need to be ++ # created using ganesha conf file mentioned in the ++ # shared storage. Every node will only have this ++ # link and actual file will stored in shared storage, ++ # so that ganesha conf editing of ganesha conf will ++ # be easy as well as it become more consistent. ++ ++ ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF ++ else ++ # Restoring previous file ++ rm -rf $GANESHA_CONF ++ cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF ++ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF ++ fi ++} ++ ++set_quorum_policy() ++{ ++ local quorum_policy="stop" ++ local num_servers=${1} ++ ++ if [ ${num_servers} -lt 3 ]; then ++ quorum_policy="ignore" ++ fi ++ pcs property set no-quorum-policy=${quorum_policy} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" ++ fi ++} ++ ++main() ++{ ++ ++ local cmd=${1}; shift ++ if [[ ${cmd} == *help ]]; then ++ usage ++ exit 0 ++ fi ++ HA_CONFDIR=${1%/}; shift ++ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf ++ local node="" ++ local vip="" ++ ++ # ignore any comment lines ++ cfgline=$(grep ^HA_NAME= ${ha_conf}) ++ eval $(echo ${cfgline} | grep -F HA_NAME=) ++ cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) ++ eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) ++ ++ case "${cmd}" in ++ ++ setup | --setup) ++ logger "setting up ${HA_NAME}" ++ ++ check_cluster_exists ${HA_NAME} ++ ++ determine_servers "setup" ++ ++ if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then ++ ++ setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" ++ ++ setup_create_resources ${HA_SERVERS} ++ ++ setup_finalize_ha ++ ++ setup_state_volume ${HA_SERVERS} ++ ++ else ++ ++ logger "insufficient servers for HA, aborting" ++ fi ++ ;; ++ ++ teardown | --teardown) ++ logger "tearing down ${HA_NAME}" ++ ++ determine_servers "teardown" ++ ++ teardown_resources ${HA_SERVERS} ++ ++ teardown_cluster ${HA_NAME} ++ ++ cleanup_ganesha_config ${HA_CONFDIR} ++ ;; ++ ++ cleanup | --cleanup) ++ cleanup_ganesha_config ${HA_CONFDIR} ++ ;; ++ ++ add | --add) ++ node=${1}; shift ++ vip=${1}; shift ++ ++ logger "adding ${node} with ${vip} to ${HA_NAME}" ++ ++ determine_service_manager ++ ++ manage_service "start" ${node} ++ ++ determine_servers "add" ++ ++ pcs cluster node add ${node} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster node add ${node} failed" ++ fi ++ ++ addnode_create_resources ${node} ${vip} ++ # Subsequent add-node recreates resources for all the nodes ++ # that already exist in the cluster. The nodes are picked up ++ # from the entries in the ganesha-ha.conf file. Adding the ++ # newly added node to the file so that the resources specfic ++ # to this node is correctly recreated in the future. ++ clean_node=${node//[-.]/_} ++ echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf ++ ++ NEW_NODES="$HA_CLUSTER_NODES,${node}" ++ ++ sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ ++$HA_CONFDIR/ganesha-ha.conf ++ ++ addnode_state_volume ${node} ++ ++ # addnode_create_resources() already appended ${node} to ++ # HA_SERVERS, so only need to increment HA_NUM_SERVERS ++ # and set quorum policy ++ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1) ++ set_quorum_policy ${HA_NUM_SERVERS} ++ ;; ++ ++ delete | --delete) ++ node=${1}; shift ++ ++ logger "deleting ${node} from ${HA_NAME}" ++ ++ determine_servers "delete" ++ ++ deletenode_delete_resources ${node} ++ ++ pcs cluster node remove ${node} ++ if [ $? -ne 0 ]; then ++ logger "warning: pcs cluster node remove ${node} failed" ++ fi ++ ++ deletenode_update_haconfig ${node} ++ ++ delnode_state_volume ${node} ++ ++ determine_service_manager ++ ++ manage_service "stop" ${node} ++ ++ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1) ++ set_quorum_policy ${HA_NUM_SERVERS} ++ ;; ++ ++ status | --status) ++ determine_servers "status" ++ ++ status ${HA_SERVERS} ++ ;; ++ ++ refresh-config | --refresh-config) ++ VOL=${1} ++ ++ determine_servers "refresh-config" ++ ++ refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS} ++ ;; ++ ++ setup-ganesha-conf-files | --setup-ganesha-conf-files) ++ ++ create_ganesha_conf_file ${1} ++ ;; ++ ++ *) ++ # setup and teardown are not intended to be used by a ++ # casual user ++ usage ++ logger "Usage: ganesha-ha.sh add|delete|status" ++ ;; ++ ++ esac ++} ++ ++main $* +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index dd7438c..d748ebc 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -460,7 +460,8 @@ Summary: NFS-Ganesha configuration + Group: Applications/File + + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: nfs-ganesha-gluster, pcs, dbus ++Requires: nfs-ganesha-gluster >= 2.4.1 ++Requires: pcs, dbus + %if ( 0%{?rhel} && 0%{?rhel} == 6 ) + Requires: cman, pacemaker, corosync + %endif +@@ -1138,6 +1139,7 @@ exit 0 + #exclude ganesha related files + %exclude %{_sysconfdir}/ganesha/* + %exclude %{_libexecdir}/ganesha/* ++%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif + + %files api +@@ -1306,6 +1308,7 @@ exit 0 + %files ganesha + %{_sysconfdir}/ganesha/* + %{_libexecdir}/ganesha/* ++%{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif + + %if ( 0%{!?_without_ocf:1} ) +@@ -1904,6 +1907,9 @@ fi + %endif + + %changelog ++* Sat Apr 6 2019 Jiffin Tony Thottan <jthottan@redhat.com> ++- Adding ganesha ha resources back in gluster repository ++ + * Fri Apr 5 2019 Jiffin Tony Thottan <jthottan@redhat.com> + - Adding ganesha bits back in gluster repository + +-- +1.8.3.1 + diff --git a/SOURCES/0056-common-ha-fixes-for-Debian-based-systems.patch b/SOURCES/0056-common-ha-fixes-for-Debian-based-systems.patch new file mode 100644 index 0000000..fef23b1 --- /dev/null +++ b/SOURCES/0056-common-ha-fixes-for-Debian-based-systems.patch @@ -0,0 +1,229 @@ +From 2c1a83920b959a1ec170243d1eec71b1e2c074b0 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Fri, 7 Apr 2017 09:09:29 -0400 +Subject: [PATCH 056/124] common-ha: fixes for Debian-based systems + +1) Debian-based systems don't have /usr/libexec/... and there is +a hard-coded invocation of /usr/libexec/ganesha/ganesha-ha.sh within +ganesha-ha.sh itself. +Fix: save $0 and use it instead for further invocations of self. + +2) default shell is /bin/dash (not /bin/bash). Various runner_run() +invocations for ganesha used what amounts to + exec("sh /usr/$libexec/ganesha/ganesha-ha.sh ...); +which executes the script using the default shell, but there are +some bash-specific idioms that don't work if the shell is dash. +Fix: change to exec("/usr/$libexec/ganesha/ganesha-ha.sh ...); so that +the shebang forces the use of /bin/bash + +3) Fedora and RHEL7 have merged /bin/ and /usr/bin, /bin is a symlink +to /usr/bin. Debian-based systems are not merged, and systemd systems +have /bin/systemctl. The logic to find .../bin/systemctl is backwards. +If the logic looks for /usr/bin/systemctl it will not find it on +Debian-based systems; if it looks for /bin/systemctl it will find it +on Fedora and RHEL by virtue of the symlink. (RHEL6 and others will +find their respective init regardless.) +Fix: change the logic to look for /bin/systemctl instead. + +4) The logic for deciding to run systemctl (or not) is a bit silly. +Fix: simply invoke the found method via the function pointer in the +table. + +Label: DOWNSTREAM ONLY + +Change-Id: I33681b296a73aebb078bda6ac0d3a1d3b9770a21 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://review.gluster.org/17013 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Niels de Vos <ndevos@redhat.com> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: jiffin tony Thottan <jthottan@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167141 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 21 +++++++++--------- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 32 +++++++++++----------------- + 2 files changed, 23 insertions(+), 30 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 6b011be..4b93f95 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -20,6 +20,7 @@ + # ensure that the NFS GRACE DBUS signal is sent after the VIP moves to + # the new host. + ++GANESHA_HA_SH=$(realpath $0) + HA_NUM_SERVERS=0 + HA_SERVERS="" + HA_VOL_NAME="gluster_shared_storage" +@@ -68,9 +69,9 @@ function find_rhel7_conf + done + } + +-if [ -z $CONFFILE ] ++if [ -z ${CONFFILE} ] + then +- find_rhel7_conf $OPTIONS ++ find_rhel7_conf ${OPTIONS} + + fi + +@@ -90,9 +91,9 @@ usage() { + + determine_service_manager () { + +- if [ -e "/usr/bin/systemctl" ]; ++ if [ -e "/bin/systemctl" ]; + then +- SERVICE_MAN="/usr/bin/systemctl" ++ SERVICE_MAN="/bin/systemctl" + elif [ -e "/sbin/invoke-rc.d" ]; + then + SERVICE_MAN="/sbin/invoke-rc.d" +@@ -100,7 +101,7 @@ determine_service_manager () { + then + SERVICE_MAN="/sbin/service" + fi +- if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ] ++ if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ] + then + echo "Service manager not recognized, exiting" + exit 1 +@@ -113,21 +114,21 @@ manage_service () + local new_node=${2} + local option= + +- if [ "$action" == "start" ]; then ++ if [ "${action}" == "start" ]; then + option="yes" + else + option="no" + fi + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +-${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option" ++${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" + +- if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ] ++ if [ "${SERVICE_MAN}" == "/bin/systemctl" ] + then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +-${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha" ++${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +-${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}" ++${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}" + fi + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index fac16e6..81f794d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -122,12 +122,9 @@ sc_service_action(struct service_command *sc, char *command) + static int + manage_service(char *action) + { +- struct stat stbuf = { +- 0, +- }; + int i = 0; + int ret = 0; +- struct service_command sc_list[] = {{.binary = "/usr/bin/systemctl", ++ struct service_command sc_list[] = {{.binary = "/bin/systemctl", + .service = "nfs-ganesha", + .action = sc_systemctl_action}, + {.binary = "/sbin/invoke-rc.d", +@@ -139,15 +136,10 @@ manage_service(char *action) + {.binary = NULL}}; + + while (sc_list[i].binary != NULL) { +- ret = sys_stat(sc_list[i].binary, &stbuf); ++ ret = sys_access(sc_list[i].binary, X_OK); + if (ret == 0) { + gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary); +- if (strcmp(sc_list[i].binary, "/usr/bin/systemctl") == 0) +- ret = sc_systemctl_action(&sc_list[i], action); +- else +- ret = sc_service_action(&sc_list[i], action); +- +- return ret; ++ return sc_list[i].action(&sc_list[i], action); + } + i++; + } +@@ -449,7 +441,7 @@ manage_export_config(char *volname, char *value, char **op_errstr) + + GF_ASSERT(volname); + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/create-export-ganesha.sh", ++ runner_add_args(&runner, GANESHA_PREFIX "/create-export-ganesha.sh", + CONFDIR, value, volname, NULL); + ret = runner_run(&runner); + +@@ -558,8 +550,8 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) + } + + if (check_host_list()) { +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/dbus-send.sh", CONFDIR, +- value, volname, NULL); ++ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, value, ++ volname, NULL); + ret = runner_run(&runner); + if (ret) { + gf_asprintf(op_errstr, +@@ -610,8 +602,8 @@ tear_down_cluster(gf_boolean_t run_teardown) + + if (run_teardown) { + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", +- "teardown", CONFDIR, NULL); ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "teardown", ++ CONFDIR, NULL); + ret = runner_run(&runner); + /* * + * Remove all the entries in CONFDIR expect ganesha.conf and +@@ -685,7 +677,7 @@ setup_cluster(gf_boolean_t run_setup) + + if (run_setup) { + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "setup", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "setup", + CONFDIR, NULL); + ret = runner_run(&runner); + } +@@ -714,7 +706,7 @@ teardown(gf_boolean_t run_teardown, char **op_errstr) + } + + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", "cleanup", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "cleanup", + CONFDIR, NULL); + ret = runner_run(&runner); + if (ret) +@@ -759,7 +751,7 @@ stop_ganesha(char **op_errstr) + }; + + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", + "--setup-ganesha-conf-files", CONFDIR, "no", NULL); + ret = runner_run(&runner); + if (ret) { +@@ -828,7 +820,7 @@ start_ganesha(char **op_errstr) + + if (check_host_list()) { + runinit(&runner); +- runner_add_args(&runner, "sh", GANESHA_PREFIX "/ganesha-ha.sh", ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", + "--setup-ganesha-conf-files", CONFDIR, "yes", NULL); + ret = runner_run(&runner); + if (ret) { +-- +1.8.3.1 + diff --git a/SOURCES/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch b/SOURCES/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch new file mode 100644 index 0000000..996e2d0 --- /dev/null +++ b/SOURCES/0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch @@ -0,0 +1,40 @@ +From 16d298584c70138fd639281bc900838d7938aec9 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 22 Feb 2017 14:37:04 +0530 +Subject: [PATCH 057/124] ganesha/scripts : Remove export entries from + ganesha.conf during cleanup + +Label: DOWNSTREAM ONLY + +Change-Id: I288f7c9ced23d258a7ce1242d8efe03a4bf6f746 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/16708 +Smoke: Gluster Build System <jenkins@build.gluster.org> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167142 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4b93f95..7ba80b5 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -342,6 +342,7 @@ cleanup_ganesha_config () + rm -f /etc/corosync/corosync.conf + rm -rf /etc/cluster/cluster.conf* + rm -rf /var/lib/pacemaker/cib/* ++ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf + } + + do_create_virt_ip_constraints() +-- +1.8.3.1 + diff --git a/SOURCES/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch b/SOURCES/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch new file mode 100644 index 0000000..251e78d --- /dev/null +++ b/SOURCES/0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch @@ -0,0 +1,62 @@ +From 172f32058b1a7d2e42f373490853aef5dd72f02f Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 22 Feb 2017 14:20:41 +0530 +Subject: [PATCH 058/124] glusterd/ganesha : During volume delete remove the + ganesha export configuration file + +Label: DOWNSTREAM ONLY + +Change-Id: I0363e7f4d7cefd3f1b3c4f91e495767ec52e230e +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/16707 +Smoke: Gluster Build System <jenkins@build.gluster.org> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167143 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 9 +++++++++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 81f794d..6d72fda 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -445,7 +445,7 @@ manage_export_config(char *volname, char *value, char **op_errstr) + CONFDIR, value, volname, NULL); + ret = runner_run(&runner); + +- if (ret) ++ if (ret && !(*op_errstr)) + gf_asprintf(op_errstr, + "Failed to create" + " NFS-Ganesha export config file."); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index a0417ca..81c668c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2936,6 +2936,15 @@ glusterd_op_delete_volume(dict_t *dict) + goto out; + } + ++ if (glusterd_check_ganesha_export(volinfo)) { ++ ret = manage_export_config(volname, "off", NULL); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, 0, ++ "Could not delete ganesha export conf file " ++ "for %s", ++ volname); ++ } ++ + ret = glusterd_delete_volume(volinfo); + out: + gf_msg_debug(this->name, 0, "returning %d", ret); +-- +1.8.3.1 + diff --git a/SOURCES/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch b/SOURCES/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch new file mode 100644 index 0000000..e41a178 --- /dev/null +++ b/SOURCES/0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch @@ -0,0 +1,132 @@ +From 8b501d9dfbeecb3ffdc3cd11b7c74aa929356ed6 Mon Sep 17 00:00:00 2001 +From: jiffin tony thottan <jthottan@redhat.com> +Date: Mon, 7 Dec 2015 14:38:54 +0530 +Subject: [PATCH 059/124] glusterd/ganesha : throw proper error for "gluster + nfs-ganesha disable" + +For first time or if "gluster nfs-ganesha enable" fails the global option +"nfs-ganesha" won't be stored in glusterd's dictionary. In both cases the +"gluster nfs-ganesha disable" throws following error : +"nfs-ganesha: failed: nfs-ganesha is already (null)d." + +Also this patch provides the missing prompt for nfs-ganesha disable in 3.10 + +Label: DOWNSTREAM ONLY + +Change-Id: I7c9fd6dabedc0cfb14c5190b3554bc63a6bc0340 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/16791 +Smoke: Gluster Build System <jenkins@build.gluster.org> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167144 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 33 +++++++++++++++++----------- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 22 +++++-------------- + 2 files changed, 26 insertions(+), 29 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index cd9c445..f85958b 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -5908,20 +5908,27 @@ cli_cmd_ganesha_parse(struct cli_state *state, const char **words, + goto out; + } + +- question = +- "Enabling NFS-Ganesha requires Gluster-NFS to be" +- " disabled across the trusted pool. Do you " +- "still want to continue?\n"; +- + if (strcmp(value, "enable") == 0) { +- answer = cli_cmd_get_confirmation(state, question); +- if (GF_ANSWER_NO == answer) { +- gf_log("cli", GF_LOG_ERROR, +- "Global operation " +- "cancelled, exiting"); +- ret = -1; +- goto out; +- } ++ question = ++ "Enabling NFS-Ganesha requires Gluster-NFS to be " ++ "disabled across the trusted pool. Do you " ++ "still want to continue?\n"; ++ } else if (strcmp(value, "disable") == 0) { ++ question = ++ "Disabling NFS-Ganesha will tear down the entire " ++ "ganesha cluster across the trusted pool. Do you " ++ "still want to continue?\n"; ++ } else { ++ ret = -1; ++ goto out; ++ } ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Global operation " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; + } + cli_out("This will take a few minutes to complete. Please wait .."); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 6d72fda..1d17a33 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -252,8 +252,7 @@ int + glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + { + int ret = -1; +- int value = -1; +- gf_boolean_t option = _gf_false; ++ char *value = NULL; + char *str = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; +@@ -264,8 +263,8 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + priv = this->private; + GF_ASSERT(priv); + +- value = dict_get_str_boolean(dict, "value", _gf_false); +- if (value == -1) { ++ ret = dict_get_str(dict, "value", &value); ++ if (value == NULL) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, + "value not present."); + goto out; +@@ -273,22 +272,13 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + /* This dict_get will fail if the user had never set the key before */ + /*Ignoring the ret value and proceeding */ + ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str); +- if (ret == -1) { +- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_GET_FAILED, +- "Global dict not present."); +- ret = 0; +- goto out; +- } +- /* Validity of the value is already checked */ +- ret = gf_string2boolean(str, &option); +- /* Check if the feature is already enabled, fail in that case */ +- if (value == option) { +- gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", str); ++ if (str ? strcmp(value, str) == 0 : strcmp(value, "disable") == 0) { ++ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", value); + ret = -1; + goto out; + } + +- if (value) { ++ if (strcmp(value, "enable")) { + ret = start_ganesha(op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL, +-- +1.8.3.1 + diff --git a/SOURCES/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch b/SOURCES/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch new file mode 100644 index 0000000..39202ca --- /dev/null +++ b/SOURCES/0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch @@ -0,0 +1,61 @@ +From 93635333d17a03078a6bf72771445e1bd9ebdc15 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Thu, 2 Mar 2017 12:22:30 +0530 +Subject: [PATCH 060/124] ganesha/scripts : Stop ganesha process on all nodes + if cluster setup fails + +During staging phase of volume option "nfs-ganesha", symlink "ganesha.conf" +will be created plus ganesha process will be started. The cluster setup +happens during commit phase of that option. So if cluster set up fails, the +ganesha process will be running on all cluster nodes. + +Label: DOWNSTREAM ONLY + +Change-Id: Ib2cb85364b7ef5b702acb4826ffdf8e6f31a2acd +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/16823 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Tested-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Reviewed-by: soumya k <skoduri@redhat.com> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167145 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 7ba80b5..db3f921 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -175,6 +175,13 @@ determine_servers() + fi + } + ++stop_ganesha_all() ++{ ++ local serverlist=${1} ++ for node in ${serverlist} ; do ++ manage_service "stop" ${node} ++ done ++} + + setup_cluster() + { +@@ -191,6 +198,8 @@ setup_cluster() + pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} + if [ $? -ne 0 ]; then + logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" ++ #set up failed stop all ganesha process and clean up symlinks in cluster ++ stop_ganesha_all ${servers} + exit 1; + fi + pcs cluster start --all +-- +1.8.3.1 + diff --git a/SOURCES/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch b/SOURCES/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch new file mode 100644 index 0000000..610c471 --- /dev/null +++ b/SOURCES/0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch @@ -0,0 +1,106 @@ +From a766878e11a984680ed29f13aae713d464ec985e Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 19 Apr 2017 16:12:10 +0530 +Subject: [PATCH 061/124] ganesha : allow refresh-config and volume + export/unexport in failover state + +If ganesha is not running on one of nodes in HA cluster, then alli dbus +commands send to that ganesha server will fail. This results in both +refresh-config and volume export/unepxort failure. This change will +gracefully handle those scenarios. + +Label: DOWNSTREAM ONLY + +Change-Id: I3f1b7b7ca98e54c273c266e56357d8e24dd1b14b +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/17081 +Smoke: Gluster Build System <jenkins@build.gluster.org> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167146 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 6 ++---- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 24 +++++++++++++++--------- + xlators/mgmt/glusterd/src/glusterd-messages.h | 2 +- + 3 files changed, 18 insertions(+), 14 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index db3f921..f040ef6 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -275,8 +275,7 @@ string:\"EXPORT(Export_Id=$export_id)\" 2>&1") + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ]; then +- echo "Error: refresh-config failed on ${current_host}." +- exit 1 ++ echo "Refresh-config failed on ${current_host}" + else + echo "Refresh-config completed on ${current_host}." + fi +@@ -297,8 +296,7 @@ string:"EXPORT(Export_Id=$export_id)" 2>&1) + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ] ; then +- echo "Error: refresh-config failed on localhost." +- exit 1 ++ echo "Refresh-config failed on localhost." + else + echo "Success: refresh-config completed." + fi +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 1d17a33..ee8b588 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -540,15 +540,21 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) + } + + if (check_host_list()) { +- runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, value, +- volname, NULL); +- ret = runner_run(&runner); +- if (ret) { +- gf_asprintf(op_errstr, +- "Dynamic export" +- " addition/deletion failed." +- " Please see log file for details"); +- goto out; ++ /* Check whether ganesha is running on this node */ ++ if (manage_service("status")) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GANESHA_NOT_RUNNING, ++ "Export failed, NFS-Ganesha is not running"); ++ } else { ++ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR, ++ value, volname, NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "Dynamic export" ++ " addition/deletion failed." ++ " Please see log file for details"); ++ goto out; ++ } + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h +index 9558480..c7b3ca8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-messages.h ++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h +@@ -298,6 +298,6 @@ GLFS_MSGID( + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, + GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED, +- GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); ++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +-- +1.8.3.1 + diff --git a/SOURCES/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch b/SOURCES/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch new file mode 100644 index 0000000..71b4416 --- /dev/null +++ b/SOURCES/0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch @@ -0,0 +1,59 @@ +From eb784a40a4f72e347945e0d66ac1a28389bb076c Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Fri, 28 Apr 2017 17:27:46 +0530 +Subject: [PATCH 062/124] glusterd/ganesha : perform removal of ganesha.conf on + nodes only in ganesha cluster + +Label: DOWNSTREAM ONLY + +Change-Id: I864ecd9391adf80fb1fa6ad2f9891a9ce77135e7 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/17138 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167147 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index ee8b588..b743216 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -746,17 +746,16 @@ stop_ganesha(char **op_errstr) + 0, + }; + +- runinit(&runner); +- runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", +- "--setup-ganesha-conf-files", CONFDIR, "no", NULL); +- ret = runner_run(&runner); +- if (ret) { +- gf_asprintf(op_errstr, +- "removal of symlink ganesha.conf " +- "in /etc/ganesha failed"); +- } +- + if (check_host_list()) { ++ runinit(&runner); ++ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", ++ "--setup-ganesha-conf-files", CONFDIR, "no", NULL); ++ ret = runner_run(&runner); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "removal of symlink ganesha.conf " ++ "in /etc/ganesha failed"); ++ } + ret = manage_service("stop"); + if (ret) + gf_asprintf(op_errstr, +-- +1.8.3.1 + diff --git a/SOURCES/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch b/SOURCES/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch new file mode 100644 index 0000000..7bbd920 --- /dev/null +++ b/SOURCES/0063-glusterd-ganesha-update-cache-invalidation-properly-.patch @@ -0,0 +1,144 @@ +From e5450c639915f4c29ae2ad480e4128b5845254cc Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Tue, 25 Apr 2017 16:36:40 +0530 +Subject: [PATCH 063/124] glusterd/ganesha : update cache invalidation properly + during volume stop + +As per current code, during volume stop for ganesha enabled volume the +feature.cache-invalidation was turned "off" in ganesha_manage_export(). +And it never turn back to "on" when volume is started. It is not desire +to modify the volume options during stop, this patch fixes above mentioned +issue. + +Label: DOWNSTREAM ONLY + +Change-Id: Iea9c62e5cda4f54805b41ea6055cf0c3652a634c +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/17111 +Smoke: Gluster Build System <jenkins@build.gluster.org> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Reviewed-by: Raghavendra Talur <rtalur@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167148 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 33 ++++++++++++++----------- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +- + xlators/mgmt/glusterd/src/glusterd.h | 3 ++- + 4 files changed, 23 insertions(+), 19 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index b743216..1c2ba7a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -445,7 +445,8 @@ manage_export_config(char *volname, char *value, char **op_errstr) + + /* Exports and unexports a particular volume via NFS-Ganesha */ + int +-ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) ++ganesha_manage_export(dict_t *dict, char *value, ++ gf_boolean_t update_cache_invalidation, char **op_errstr) + { + runner_t runner = { + 0, +@@ -558,19 +559,21 @@ ganesha_manage_export(dict_t *dict, char *value, char **op_errstr) + } + } + +- vol_opts = volinfo->dict; +- ret = dict_set_dynstr_with_alloc(vol_opts, "features.cache-invalidation", +- value); +- if (ret) +- gf_asprintf(op_errstr, +- "Cache-invalidation could not" +- " be set to %s.", +- value); +- ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); +- if (ret) +- gf_asprintf(op_errstr, "failed to store volinfo for %s", +- volinfo->volname); +- ++ if (update_cache_invalidation) { ++ vol_opts = volinfo->dict; ++ ret = dict_set_dynstr_with_alloc(vol_opts, ++ "features.cache-invalidation", value); ++ if (ret) ++ gf_asprintf(op_errstr, ++ "Cache-invalidation could not" ++ " be set to %s.", ++ value); ++ ret = glusterd_store_volinfo(volinfo, ++ GLUSTERD_VOLINFO_VER_AC_INCREMENT); ++ if (ret) ++ gf_asprintf(op_errstr, "failed to store volinfo for %s", ++ volinfo->volname); ++ } + out: + return ret; + } +@@ -867,7 +870,7 @@ glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key, + GF_ASSERT(value); + + if (strcmp(key, "ganesha.enable") == 0) { +- ret = ganesha_manage_export(dict, value, op_errstr); ++ ret = ganesha_manage_export(dict, value, _gf_true, op_errstr); + if (ret < 0) + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index a630c48..52809a8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -1178,7 +1178,7 @@ glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr) + + if ((strcmp(key, "ganesha.enable") == 0) && + (strcmp(value, "off") == 0)) { +- ret = ganesha_manage_export(dict, "off", op_errstr); ++ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr); + if (ret) + goto out; + } +@@ -1691,7 +1691,7 @@ glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr) + */ + if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) { + if (glusterd_check_ganesha_export(volinfo)) { +- ret = ganesha_manage_export(dict, "off", op_errstr); ++ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, + "Could not reset ganesha.enable key"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 81c668c..de4eccb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1825,7 +1825,7 @@ glusterd_op_stage_stop_volume(dict_t *dict, char **op_errstr) + + ret = glusterd_check_ganesha_export(volinfo); + if (ret) { +- ret = ganesha_manage_export(dict, "off", op_errstr); ++ ret = ganesha_manage_export(dict, "off", _gf_false, op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, + GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL, +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 5135181..e858ce4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -1368,7 +1368,8 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr); + int + glusterd_op_set_ganesha(dict_t *dict, char **errstr); + int +-ganesha_manage_export(dict_t *dict, char *value, char **op_errstr); ++ganesha_manage_export(dict_t *dict, char *value, ++ gf_boolean_t update_cache_invalidation, char **op_errstr); + gf_boolean_t + glusterd_is_ganesha_cluster(); + gf_boolean_t +-- +1.8.3.1 + diff --git a/SOURCES/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch b/SOURCES/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch new file mode 100644 index 0000000..042e1c0 --- /dev/null +++ b/SOURCES/0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch @@ -0,0 +1,52 @@ +From 37bf4daca164cfcb260760ee2fd25d66f920dc7f Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 22 Feb 2017 18:26:30 +0530 +Subject: [PATCH 064/124] glusterd/ganesha : return proper value in pre_setup() + +Label: DOWNSTREAM ONLY + +Change-Id: I6f7ce82488904c7d418ee078162f26f1ec81e9d9 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/16733 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-by: Raghavendra Talur <rtalur@redhat.com> +Tested-by: Raghavendra Talur <rtalur@redhat.com> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167149 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 1c2ba7a..d9fdfc6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -843,16 +843,15 @@ pre_setup(gf_boolean_t run_setup, char **op_errstr) + { + int ret = 0; + +- ret = check_host_list(); +- +- if (ret) { ++ if (check_host_list()) { + ret = setup_cluster(run_setup); + if (ret == -1) + gf_asprintf(op_errstr, + "Failed to set up HA " + "config for NFS-Ganesha. " + "Please check the log file for details"); +- } ++ } else ++ ret = -1; + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0065-ganesha-scripts-remove-dependency-over-export-config.patch b/SOURCES/0065-ganesha-scripts-remove-dependency-over-export-config.patch new file mode 100644 index 0000000..4c99ef4 --- /dev/null +++ b/SOURCES/0065-ganesha-scripts-remove-dependency-over-export-config.patch @@ -0,0 +1,58 @@ +From 7a47c004b907ed5469b78d559cae6d151e4d626b Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Thu, 23 Feb 2017 16:21:52 +0530 +Subject: [PATCH 065/124] ganesha/scripts : remove dependency over export + configuration file for unexport + +Currently unexport is performed by reading export id from volume configuration +file. So unexport has dependency over that file. This patch will unexport with +help of dbus command ShowExport. And it will only unexport the share which is +added via cli. + +Label: DOWNSTREAM ONLY + +Change-Id: I6f3c9b2bb48f0328b18e9cc0e4b9356174afd596 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/16771 +Smoke: Gluster Build System <jenkins@build.gluster.org> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Reviewed-by: Raghavendra Talur <rtalur@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167150 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/dbus-send.sh | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh +index ec8d948..9d613a0 100755 +--- a/extras/ganesha/scripts/dbus-send.sh ++++ b/extras/ganesha/scripts/dbus-send.sh +@@ -41,8 +41,18 @@ string:"EXPORT(Path=/$VOL)" + #This function removes an export dynamically(uses the export_id of the export) + function dynamic_export_remove() + { +- removed_id=`cat $GANESHA_DIR/exports/export.$VOL.conf |\ +-grep Export_Id | awk -F"[=,;]" '{print$2}'| tr -d '[[:space:]]'` ++ # Below bash fetch all the export from ShowExport command and search ++ # export entry based on path and then get its export entry. ++ # There are two possiblities for path, either entire volume will be ++ # exported or subdir. It handles both cases. But it remove only first ++ # entry from the list based on assumption that entry exported via cli ++ # has lowest export id value ++ removed_id=$(dbus-send --type=method_call --print-reply --system \ ++ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ ++ org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \ ++ "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \ ++ | head -1) ++ + dbus-send --print-reply --system \ + --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ + org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id +-- +1.8.3.1 + diff --git a/SOURCES/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch b/SOURCES/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch new file mode 100644 index 0000000..187b97c --- /dev/null +++ b/SOURCES/0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch @@ -0,0 +1,41 @@ +From d91eadbbb3e2d02e7297214da394b0e232544386 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Tue, 2 May 2017 14:06:00 +0530 +Subject: [PATCH 066/124] glusterd/ganesha : add proper NULL check in + manage_export_config + +Label: DOWNSTREAM ONLY + +Change-Id: I872b2b6b027f04e61f60ad85588f50e1ef2f988c +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/17150 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167151 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index d9fdfc6..fe0bffc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -435,7 +435,7 @@ manage_export_config(char *volname, char *value, char **op_errstr) + CONFDIR, value, volname, NULL); + ret = runner_run(&runner); + +- if (ret && !(*op_errstr)) ++ if (ret && op_errstr) + gf_asprintf(op_errstr, + "Failed to create" + " NFS-Ganesha export config file."); +-- +1.8.3.1 + diff --git a/SOURCES/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch b/SOURCES/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch new file mode 100644 index 0000000..233725e --- /dev/null +++ b/SOURCES/0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch @@ -0,0 +1,41 @@ +From 1e5c6bb28894a57e5ca5ed7b4b3b5e05efecf7cd Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 3 May 2017 12:47:14 +0530 +Subject: [PATCH 067/124] ganesha : minor improvments for commit e91cdf4 + (17081) + +Label: DOWNSTREAM ONLY + +Change-Id: I3af13e081c5e46cc6f2c132e7a5106ac3355c850 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://review.gluster.org/17152 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167152 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index f040ef6..cedc3fa 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -275,7 +275,7 @@ string:\"EXPORT(Export_Id=$export_id)\" 2>&1") + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ]; then +- echo "Refresh-config failed on ${current_host}" ++ echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}" + else + echo "Refresh-config completed on ${current_host}." + fi +-- +1.8.3.1 + diff --git a/SOURCES/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch b/SOURCES/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch new file mode 100644 index 0000000..3658ec1 --- /dev/null +++ b/SOURCES/0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch @@ -0,0 +1,58 @@ +From aabc623f99d22a2a9e1d52f3ca7de1dc5b49946d Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Tue, 13 Jun 2017 07:36:50 -0400 +Subject: [PATCH 068/124] common-ha: surviving ganesha.nfsd not put in grace on + fail-over + +Behavior change is seen in new HA in RHEL 7.4 Beta. Up to now clone +RAs have been created with "pcs resource create ... meta notify=true". +Their notify method is invoked with pre-start or post-stop when one of +the clone RAs is started or stopped. + +In 7.4 Beta the notify method we observe that the notify method is not +invoked when one of the clones is stopped (or started). + +Ken Gaillot, one of the pacemaker devs, wrote: + With the above command, pcs puts the notify=true meta-attribute + on the primitive instead of the clone. Looking at the pcs help, + that seems expected (--clone notify=true would put it on the clone, + meta notify=true puts it on the primitive). If you drop the "meta" + above, I think it will work again. + +And indeed his suggested fix does work on both RHEL 7.4 Beta and RHEL +7.3 and presumably Fedora. + +Label: DOWNSTREAM ONLY + +Change-Id: Idbb539f1366df6d39f77431c357dff4e53a2df6d +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://review.gluster.org/17534 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167153 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index cedc3fa..537c965 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -445,7 +445,7 @@ setup_create_resources() + # ganesha-active crm_attribute + sleep 5 + +- pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true ++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" + fi +-- +1.8.3.1 + diff --git a/SOURCES/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch b/SOURCES/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch new file mode 100644 index 0000000..16aea73 --- /dev/null +++ b/SOURCES/0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch @@ -0,0 +1,96 @@ +From 916a79ea78db264ceedd4ebdba794e488b82eceb Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Wed, 21 Jun 2017 10:01:20 -0400 +Subject: [PATCH 069/124] common-ha: enable and disable selinux + ganesha_use_fusefs + +Starting in Fedora 26 and RHEL 7.4 there are new targeted policies +in selinux which include a tuneable to allow ganesha.nfsd to access +the gluster (FUSE) shared_storage volume where ganesha maintains its +state. + +N.B. rpm doesn't have a way to distinguish between RHEL 7.3 or 7.4 +so it can't be enabled for RHEL at this time. /usr/sbin/semanage is +in policycoreutils-python in RHEL (versus policycoreutils-python-utils +in Fedora.) Once RHEL 7.4 GAs we may also wish to specify the version +for RHEL 7 explicitly, i.e. + Requires: selinux-policy >= 3.13.1-160. +But beware, the corresponding version in Fedora 26 seems to be +selinux-policy-3.13.1.258 or so. (Maybe earlier versions, but that's +what's currently in the F26 beta. + +release-3.10 is the upstream master branch for glusterfs-ganesha. For +release-3.11 and later storhaug needs a similar change, which is +tracked by https://github.com/linux-ha-storage/storhaug/issues/11 + +Maybe at some point we would want to consider migrating the targeted +policies for glusterfs (and nfs-ganesha) from selinux-policy to a +glusterfs-selinux (and nfs-ganesha-selinux) subpackage? + +Label: DOWNSTREAM ONLY + +Change-Id: I04a5443edd00636cbded59a2baddfa98095bf7ac +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://review.gluster.org/17597 +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Niels de Vos <ndevos@redhat.com> +Reviewed-by: jiffin tony Thottan <jthottan@redhat.com> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167154 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index d748ebc..b01c94f 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -466,6 +466,11 @@ Requires: pcs, dbus + Requires: cman, pacemaker, corosync + %endif + ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++Requires(post): policycoreutils-python-utils ++Requires(postun): policycoreutils-python-utils ++%endif ++ + %description ganesha + GlusterFS is a distributed file-system capable of scaling to several + petabytes. It aggregates various storage bricks over Infiniband RDMA +@@ -923,6 +928,14 @@ exit 0 + %systemd_post glustereventsd + %endif + ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%post ganesha ++semanage boolean -m ganesha_use_fusefs --on ++exit 0 ++%endif ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + %post geo-replication + if [ $1 -ge 1 ]; then +@@ -1055,6 +1068,14 @@ fi + exit 0 + %endif + ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%postun ganesha ++semanage boolean -m ganesha_use_fusefs --off ++exit 0 ++%endif ++%endif ++ + ##----------------------------------------------------------------------------- + ## All %%files should be placed here and keep them grouped + ## +-- +1.8.3.1 + diff --git a/SOURCES/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch b/SOURCES/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch new file mode 100644 index 0000000..6715f1f --- /dev/null +++ b/SOURCES/0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch @@ -0,0 +1,76 @@ +From f410cd9f9b9455373a9612423558d8d0f83cd0fc Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Wed, 12 Jul 2017 07:43:51 -0400 +Subject: [PATCH 070/124] packaging: glusterfs-ganesha update sometimes fails + semanage + +Depending on how dnf orders updates, the updated version of +selinux-policy-targeted with ganesha_use_fusefs may not be updated +before the glusterfs-ganesha update execute its %post scriptlet +containing the `semanage ganesha_use_fusefs ...` command. In such +situations the semanage command (silently) fails. + +Use a %trigger (and %triggerun) to run the scriptlet (again) after +selinux-policy-targeted with ganesha_use_fusefs has been installed +or updated. + +Note: the %triggerun is probably unnecessary, but it doesn't hurt. + +The release-3.10 branch is the "upstream master" for the glusterfs- +ganesha subpackage. + +Note: to be merged after https://review.gluster.org/17806 + +Label: DOWNSTREAM ONLY + +Change-Id: I1ad06d79fa1711e4abf038baf9f0a5b7bb665934 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://review.gluster.org/17756 +Smoke: Gluster Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Niels de Vos <ndevos@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167155 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index b01c94f..1d99a3d 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1077,6 +1077,28 @@ exit 0 + %endif + + ##----------------------------------------------------------------------------- ++## All %%trigger should be placed here and keep them sorted ++## ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%trigger ganesha -- selinux-policy-targeted ++semanage boolean -m ganesha_use_fusefs --on ++exit 0 ++%endif ++%endif ++ ++##----------------------------------------------------------------------------- ++## All %%triggerun should be placed here and keep them sorted ++## ++%if ( 0%{!?_without_server:1} ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%triggerun ganesha -- selinux-policy-targeted ++semanage boolean -m ganesha_use_fusefs --off ++exit 0 ++%endif ++%endif ++ ++##----------------------------------------------------------------------------- + ## All %%files should be placed here and keep them grouped + ## + %files +-- +1.8.3.1 + diff --git a/SOURCES/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch b/SOURCES/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch new file mode 100644 index 0000000..ad14a89 --- /dev/null +++ b/SOURCES/0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch @@ -0,0 +1,66 @@ +From 662c94f3b3173bf78465644e2e42e03efd9ea493 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Mon, 17 Jul 2017 11:07:40 -0400 +Subject: [PATCH 071/124] common-ha: enable and disable selinux + gluster_use_execmem + +Starting in Fedora 26 and RHEL 7.4 there are new targeted policies in +selinux which include a tuneable to allow glusterd->ganesha-ha.sh->pcs +to access the pcs config, i.e. gluster-use-execmem. + +Note. rpm doesn't have a way to distinguish between RHEL 7.3 or 7.4 +or between 3.13.1-X and 3.13.1-Y so it can't be enabled for RHEL at +this time. + +/usr/sbin/semanage is in policycoreutils-python in RHEL (versus +policycoreutils-python-utils in Fedora.) + +Requires selinux-policy >= 3.13.1-160 in RHEL7. The corresponding +version in Fedora 26 seems to be selinux-policy-3.13.1-259 or so. (Maybe +earlier versions, but that's what was in F26 when I checked.) + +Label: DOWNSTREAM ONLY + +Change-Id: Ic474b3f7739ff5be1e99d94d00b55caae4ceb5a0 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://review.gluster.org/17806 +Smoke: Gluster Build System <jenkins@build.gluster.org> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: soumya k <skoduri@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167156 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 537c965..f4400af 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -984,6 +984,9 @@ main() + usage + exit 0 + fi ++ ++ semanage boolean -m gluster_use_execmem --on ++ + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" +@@ -1129,6 +1132,9 @@ $HA_CONFDIR/ganesha-ha.conf + ;; + + esac ++ ++ semanage boolean -m gluster_use_execmem --off ++ + } + + main $* +-- +1.8.3.1 + diff --git a/SOURCES/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch b/SOURCES/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch new file mode 100644 index 0000000..4bf730b --- /dev/null +++ b/SOURCES/0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch @@ -0,0 +1,60 @@ +From c147bbec10fc72b85301ab6a7580f15713b8a974 Mon Sep 17 00:00:00 2001 +From: Ambarish <asoman@redhat.com> +Date: Tue, 12 Sep 2017 18:34:29 +0530 +Subject: [PATCH 072/124] ganesha-ha: don't set SELinux booleans if SELinux is + disabled + +semanage commands inside ganesha-ha.sh script will fail if selinux is +Disabled. This patch introduces a check if selinux is enabled or not, +and subsequently run semange commands only on selinux enabled systems. + +Label: DOWNSTREAM ONLY + +Change-Id: Ibee61cbb1d51a73e6c326b49bac5c7ce06feb310 +Signed-off-by: Ambarish <asoman@redhat.com> +Reviewed-on: https://review.gluster.org/18264 +Reviewed-by: Niels de Vos <ndevos@redhat.com> +Smoke: Gluster Build System <jenkins@build.gluster.org> +Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> +Reviewed-by: jiffin tony Thottan <jthottan@redhat.com> +Reviewed-by: Daniel Gryniewicz <dang@redhat.com> +CentOS-regression: Gluster Build System <jenkins@build.gluster.org> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167157 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index f4400af..e1d3ea0 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -985,7 +985,9 @@ main() + exit 0 + fi + +- semanage boolean -m gluster_use_execmem --on ++ if (selinuxenabled) ;then ++ semanage boolean -m gluster_use_execmem --on ++ fi + + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf +@@ -1133,8 +1135,9 @@ $HA_CONFDIR/ganesha-ha.conf + + esac + +- semanage boolean -m gluster_use_execmem --off +- ++ if (selinuxenabled) ;then ++ semanage boolean -m gluster_use_execmem --off ++ fi + } + + main $* +-- +1.8.3.1 + diff --git a/SOURCES/0073-build-remove-ganesha-dependency-on-selinux-policy.patch b/SOURCES/0073-build-remove-ganesha-dependency-on-selinux-policy.patch new file mode 100644 index 0000000..4d16042 --- /dev/null +++ b/SOURCES/0073-build-remove-ganesha-dependency-on-selinux-policy.patch @@ -0,0 +1,45 @@ +From 52279c877264f41b522f747a986b937e6f054e2a Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Fri, 23 Jun 2017 20:43:16 +0530 +Subject: [PATCH 073/124] build: remove ganesha dependency on selinux-policy + +Problem: +Puddle creation fails with unresolved dependencies + unresolved deps: +*** selinux-policy >= 0:3.13.1-160 + +Solution: +We know a priori about the version at RHEL 7.4 is already the desired +version. So removing this explicit dependency *should* not be a gluster +test blocker. + +Label: DOWNSTREAM ONLY + +Change-Id: Id53ac0e41adc14704932787ee0dd3143e6615aaf +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/109945 +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167158 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1d99a3d..e55e255 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -467,6 +467,7 @@ Requires: cman, pacemaker, corosync + %endif + + %if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++Requires: selinux-policy >= 3.13.1-160 + Requires(post): policycoreutils-python-utils + Requires(postun): policycoreutils-python-utils + %endif +-- +1.8.3.1 + diff --git a/SOURCES/0074-common-ha-enable-pacemaker-at-end-of-setup.patch b/SOURCES/0074-common-ha-enable-pacemaker-at-end-of-setup.patch new file mode 100644 index 0000000..6366f0c --- /dev/null +++ b/SOURCES/0074-common-ha-enable-pacemaker-at-end-of-setup.patch @@ -0,0 +1,67 @@ +From bfbda24746bf11573b485baf534a5cf1373c6c89 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Wed, 7 Jun 2017 08:15:48 -0400 +Subject: [PATCH 074/124] common-ha: enable pacemaker at end of setup + +Label: DOWNSTREAM ONLY + +Change-Id: I3ccd59b67ed364bfc5d27e88321ab5b9f8d471fd +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/108431 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167159 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index e1d3ea0..d7dfb87 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -787,6 +787,22 @@ setup_state_volume() + } + + ++enable_pacemaker() ++{ ++ while [[ ${1} ]]; do ++ if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then ++${SECRET_PEM} root@${1} ${SERVICE_MAN} enable pacemaker" ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" ++ else ++ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ++${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable" ++ fi ++ shift ++ done ++} ++ ++ + addnode_state_volume() + { + local newnode=${1}; shift +@@ -1011,6 +1027,8 @@ main() + + if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then + ++ determine_service_manager ++ + setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" + + setup_create_resources ${HA_SERVERS} +@@ -1019,6 +1037,8 @@ main() + + setup_state_volume ${HA_SERVERS} + ++ enable_pacemaker ${HA_SERVERS} ++ + else + + logger "insufficient servers for HA, aborting" +-- +1.8.3.1 + diff --git a/SOURCES/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch b/SOURCES/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch new file mode 100644 index 0000000..b524cfe --- /dev/null +++ b/SOURCES/0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch @@ -0,0 +1,43 @@ +From 0a124b59c662c8f85fe6d184b839cbfe29d5e8ab Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Wed, 14 Jun 2017 15:20:22 +0530 +Subject: [PATCH 075/124] common-ha: Fix an incorrect syntax during setup + +There was an invalid line introduced as part of +https://code.engineering.redhat.com/gerrit/#/c/108431/ + +Detected by rpmdiff - + https://errata.devel.redhat.com/rpmdiff/show/175336?result_id=4796901 + +This change is to fix the same. + +Label: DOWNSTREAM ONLY + +Change-Id: I55cdd7d866cb175fb620dbbd2d02c36eab291a74 +Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/109017 +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +Tested-by: Kaleb Keithley <kkeithle@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167160 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index d7dfb87..8302c5e 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -791,7 +791,6 @@ enable_pacemaker() + { + while [[ ${1} ]]; do + if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then +-${SECRET_PEM} root@${1} ${SERVICE_MAN} enable pacemaker" + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else +-- +1.8.3.1 + diff --git a/SOURCES/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch b/SOURCES/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch new file mode 100644 index 0000000..4147a46 --- /dev/null +++ b/SOURCES/0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch @@ -0,0 +1,44 @@ +From a917a989232d2c72752f8a2cf27bad90b5acb83d Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Tue, 27 Feb 2018 15:35:30 +0530 +Subject: [PATCH 076/124] glusterd/ganesha : change voltype for ganesha.enable + in volume option table + +The voltype defined for ganesha.enable is features/ganesha. But ganesha xlator +was removed from client stack long back. Now it is defined as part of glusterd. +So reflecting the same on the volume option table. + +Label: DOWNSTREAM ONLY + +Upstream reference : +>patch link https://review.gluster.org/19639 +>Change-Id: Ifedd7493020b77bd54edfdbdd9c799d93b24d0aa +>BUG: 1486542 +>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> + +Change-Id: Ifedd7493020b77bd54edfdbdd9c799d93b24d0aa +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167161 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 13f423a..c8f6e67 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2599,7 +2599,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + }, + { + .key = "ganesha.enable", +- .voltype = "features/ganesha", ++ .voltype = "mgmt/ganesha", + .value = "off", + .option = "ganesha.enable", + .op_version = GD_OP_VERSION_3_7_0, +-- +1.8.3.1 + diff --git a/SOURCES/0077-glusterd-ganesha-create-remove-export-file-only-from.patch b/SOURCES/0077-glusterd-ganesha-create-remove-export-file-only-from.patch new file mode 100644 index 0000000..139a8dd --- /dev/null +++ b/SOURCES/0077-glusterd-ganesha-create-remove-export-file-only-from.patch @@ -0,0 +1,73 @@ +From 1e619b95e3f03e226fef135bfaeeca9b069eb978 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 14 Mar 2018 12:01:30 +0530 +Subject: [PATCH 077/124] glusterd/ganesha : create/remove export file only + from the node which performs ganesha.enable + +As part of volume set ganesha.enable on the ganesha export configuration file will be created/removed +using "create-export-ganesha.sh". This performed from the nodes which are part of ganesha cluster. +But it is not need since the file is saved in shared storage and consumed by the nodes in the ganesha cluster. + +Label: DOWNSTREAM ONLY + +Upstream Reference : +>patch link : https://review.gluster.org/#/c/19716/ +>Change-Id: I2583899972b47d451a90711940293004a3af4690 +>BUG: 1555195 +>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> + +Change-Id: I2583899972b47d451a90711940293004a3af4690 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167162 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 3 ++- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index fe0bffc..ff36476 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -530,7 +530,7 @@ ganesha_manage_export(dict_t *dict, char *value, + * Create the export file from the node where ganesha.enable "on" + * is executed + * */ +- if (option) { ++ if (option && is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "on", op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL, +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 52809a8..10e2d48 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2415,7 +2415,8 @@ glusterd_op_reset_volume(dict_t *dict, char **op_rspstr) + } + + if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) { +- if (glusterd_check_ganesha_export(volinfo)) { ++ if (glusterd_check_ganesha_export(volinfo) && ++ is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "off", op_rspstr); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL, +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index de4eccb..1ea8ba6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2936,7 +2936,7 @@ glusterd_op_delete_volume(dict_t *dict) + goto out; + } + +- if (glusterd_check_ganesha_export(volinfo)) { ++ if (glusterd_check_ganesha_export(volinfo) && is_origin_glusterd(dict)) { + ret = manage_export_config(volname, "off", NULL); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, 0, +-- +1.8.3.1 + diff --git a/SOURCES/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch b/SOURCES/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch new file mode 100644 index 0000000..fe29fc7 --- /dev/null +++ b/SOURCES/0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch @@ -0,0 +1,40 @@ +From 5daff948884b1b68ffcbc6ceea3c7affdb9700f4 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 4 Apr 2018 09:29:43 +0530 +Subject: [PATCH 078/124] common-ha/scripts : pass the list of servers properly + to stop_ganesha_all() + +Label: DOWNSTREAM ONLY + +Upstream Reference : +>Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08 +>BUG: 1563500 +>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +>Patch link : https://review.gluster.org/#/c/19816/ + +Change-Id: I6d92623cd9fb450d7a27f5acc61eca0b3cbc9b08 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167163 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 8302c5e..4e5799f 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -199,7 +199,7 @@ setup_cluster() + if [ $? -ne 0 ]; then + logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" + #set up failed stop all ganesha process and clean up symlinks in cluster +- stop_ganesha_all ${servers} ++ stop_ganesha_all "${servers}" + exit 1; + fi + pcs cluster start --all +-- +1.8.3.1 + diff --git a/SOURCES/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch b/SOURCES/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch new file mode 100644 index 0000000..982a531 --- /dev/null +++ b/SOURCES/0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch @@ -0,0 +1,93 @@ +From 7e71723a46237f13a570961054b361dc1b34ab25 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Thu, 19 Jan 2017 15:01:12 +0530 +Subject: [PATCH 079/124] common-ha: All statd related files need to be owned + by rpcuser + +Statd service is started as rpcuser by default. Hence the +files/directories needed by it under '/var/lib/nfs' should be +owned by the same user. + +Note: This change is not in mainline as the cluster-bits +are being moved to storehaug project - +http://review.gluster.org/#/c/16349/ +http://review.gluster.org/#/c/16333/ + +Label: DOWNSTREAM ONLY + +Upstream Reference : +> Change-Id: I89fd06aa9700c5ce60026ac825da7c154d9f48fd +> BUG: 1414665 +> Signed-off-by: Soumya Koduri <skoduri@redhat.com> +> Reviewed-on: http://review.gluster.org/16433 +> Reviewed-by: jiffin tony Thottan <jthottan@redhat.com> +> Smoke: Gluster Build System <jenkins@build.gluster.org> +> Tested-by: Kaleb KEITHLEY <kkeithle@redhat.com> +> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> +> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> + +Change-Id: I89fd06aa9700c5ce60026ac825da7c154d9f48fd +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167164 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4e5799f..4a98f32 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -756,9 +756,11 @@ setup_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov +@@ -768,9 +770,11 @@ setup_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state +@@ -830,9 +834,11 @@ addnode_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov +@@ -842,9 +848,11 @@ addnode_state_volume() + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ++ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state +-- +1.8.3.1 + diff --git a/SOURCES/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch b/SOURCES/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch new file mode 100644 index 0000000..acd1d4a --- /dev/null +++ b/SOURCES/0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch @@ -0,0 +1,62 @@ +From c5c6720c5186741a3b01a5ba2b34633fc1a00fc5 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 30 Apr 2018 12:35:01 +0530 +Subject: [PATCH 080/124] glusterd/ganesha : Skip non-ganesha nodes properly + for ganesha HA set up + +Label: DOWNSTREAM ONLY + +Upstream reference: +>Patch unlink https://review.gluster.org/#/c/19949/ +>Change-Id: Iff7bc3ead43e97847219c5a5cc8b967bf0967903 +>BUG: 1573078 +>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> + +Change-Id: Iff7bc3ead43e97847219c5a5cc8b967bf0967903 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167165 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index ff36476..d882105 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -842,17 +842,20 @@ static int + pre_setup(gf_boolean_t run_setup, char **op_errstr) + { + int ret = 0; +- +- if (check_host_list()) { +- ret = setup_cluster(run_setup); +- if (ret == -1) ++ if (run_setup) { ++ if (!check_host_list()) { + gf_asprintf(op_errstr, +- "Failed to set up HA " +- "config for NFS-Ganesha. " +- "Please check the log file for details"); +- } else +- ret = -1; +- ++ "Running nfs-ganesha setup command " ++ "from node which is not part of ganesha cluster"); ++ return -1; ++ } ++ } ++ ret = setup_cluster(run_setup); ++ if (ret == -1) ++ gf_asprintf(op_errstr, ++ "Failed to set up HA " ++ "config for NFS-Ganesha. " ++ "Please check the log file for details"); + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch b/SOURCES/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch new file mode 100644 index 0000000..0a4110f --- /dev/null +++ b/SOURCES/0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch @@ -0,0 +1,50 @@ +From 3cb9ed7e20f59eec036908eed4cfdbc61e990ee2 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Tue, 11 Dec 2018 10:09:42 -0500 +Subject: [PATCH 081/124] ganesha-ha: ensure pacemaker is enabled after setup + +There appears to be a race between `pcs cluster setup ...` early +in the setup and the `systemctl enable pacemaker` at the end. The +`pcs cluster setup ...` disables pacemaker and corosync. (Now, in +pacemaker-1.1.18. Was it always the case?) + +I am not able to reproduce this on my devel system. I speculate that +on a busy system that the `pcs cluster setup ...` disable may, under +the right conditions, not run until after the setup script enables +it. It must require the right alignment of the Sun, Moon, and all +the planets. + +Regardless, we'll use the --enable option to `pcs cluster setup ...` +to ensure that the cluster (re)starts pacemaker. + +Label: DOWNSTREAM ONLY + +Change-Id: I771ff62c37426438b80e61651a8b4ecaf2d549c3 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167166 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4a98f32..32af1ca 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -195,9 +195,9 @@ setup_cluster() + + pcs cluster auth ${servers} + # pcs cluster setup --name ${name} ${servers} +- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} ++ pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} + if [ $? -ne 0 ]; then +- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" ++ logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} failed" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" + exit 1; +-- +1.8.3.1 + diff --git a/SOURCES/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch b/SOURCES/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch new file mode 100644 index 0000000..6df51eb --- /dev/null +++ b/SOURCES/0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch @@ -0,0 +1,59 @@ +From 6d6841a996a52488e8a18606f386bba0a12b4231 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Fri, 18 Nov 2016 12:47:06 +0530 +Subject: [PATCH 082/124] build: Add dependency on netstat for + glusterfs-ganesha pkg + +portblock resource-agent needs netstat command but this dependency +should have been ideally added to resource-agents package. But the +fixes (bug1395594, bug1395596) are going to be available only +in the future RHEL 6.9 and RHEL 7.4 releases. Hence as an interim +workaround, we agreed to add this dependency for glusterfs-ganesha package. + +label : DOWNSTREAM ONLY + +Change-Id: I6ac1003103755d7534dd079c821bbaacd8dd94b8 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167167 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index e55e255..bc27058 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -466,6 +466,12 @@ Requires: pcs, dbus + Requires: cman, pacemaker, corosync + %endif + ++%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 ) ++# we need portblock resource-agent in 3.9.5 and later. ++Requires: resource-agents >= 3.9.5 ++Requires: net-tools ++%endif ++ + %if ( 0%{?fedora} && 0%{?fedora} > 25 ) + Requires: selinux-policy >= 3.13.1-160 + Requires(post): policycoreutils-python-utils +@@ -1951,6 +1957,14 @@ fi + %endif + + %changelog ++* Sun Apr 7 2019 Soumya Koduri <skoduri@redhat.com> ++- As an interim fix add dependency on netstat(/net-tools) for glusterfs-ganesha package (#1395574) ++ ++* Sun Apr 7 2019 Soumya Koduri <skoduri@redhat.com> ++- Add dependency on portblock resource agent for ganesha package (#1278336) ++- Fix incorrect Requires for portblock resource agent (#1278336) ++- Update version checks for portblock resource agent on RHEL (#1278336) ++ + * Sat Apr 6 2019 Jiffin Tony Thottan <jthottan@redhat.com> + - Adding ganesha ha resources back in gluster repository + +-- +1.8.3.1 + diff --git a/SOURCES/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch b/SOURCES/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch new file mode 100644 index 0000000..2d12285 --- /dev/null +++ b/SOURCES/0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch @@ -0,0 +1,82 @@ +From a80743a3053798521ae4dd830adcde8bc7da11b6 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Tue, 20 Feb 2018 11:50:33 +0530 +Subject: [PATCH 083/124] common-ha: enable and disable selinux + ganesha_use_fusefs + +Adding missing changes in a downstream backport(https://code.engineering.redhat.com/gerrit/#/c/109845/) + +Label: DOWNSTREAM ONLY + +Change-Id: I59fd2fc2228ded9547c2d1e08c22f7a10c35f86f +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167244 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index bc27058..2149f86 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -472,11 +472,16 @@ Requires: resource-agents >= 3.9.5 + Requires: net-tools + %endif + +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) ++%if ( 0%{?rhel} ) + Requires: selinux-policy >= 3.13.1-160 ++Requires(post): policycoreutils-python ++Requires(postun): policycoreutils-python ++%else + Requires(post): policycoreutils-python-utils + Requires(postun): policycoreutils-python-utils + %endif ++%endif + + %description ganesha + GlusterFS is a distributed file-system capable of scaling to several +@@ -936,7 +941,7 @@ exit 0 + %endif + + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %post ganesha + semanage boolean -m ganesha_use_fusefs --on + exit 0 +@@ -1076,7 +1081,7 @@ exit 0 + %endif + + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %postun ganesha + semanage boolean -m ganesha_use_fusefs --off + exit 0 +@@ -1087,7 +1092,7 @@ exit 0 + ## All %%trigger should be placed here and keep them sorted + ## + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %trigger ganesha -- selinux-policy-targeted + semanage boolean -m ganesha_use_fusefs --on + exit 0 +@@ -1098,7 +1103,7 @@ exit 0 + ## All %%triggerun should be placed here and keep them sorted + ## + %if ( 0%{!?_without_server:1} ) +-%if ( 0%{?fedora} && 0%{?fedora} > 25 ) ++%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) + %triggerun ganesha -- selinux-policy-targeted + semanage boolean -m ganesha_use_fusefs --off + exit 0 +-- +1.8.3.1 + diff --git a/SOURCES/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch b/SOURCES/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch new file mode 100644 index 0000000..04f8013 --- /dev/null +++ b/SOURCES/0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch @@ -0,0 +1,37 @@ +From d7bee4a4ad0878003e19711e20994c42c4d2bd9e Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Tue, 9 Apr 2019 16:15:09 +0530 +Subject: [PATCH 084/124] glusterd: Fix duplicate client_op_version in info + file + +This must have been introduced while applying downstream only patches at +RHGS 3.5.0 branch. + +Change-Id: I231249cca2a7bce29ef53cf95f9d2377b8203283 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167341 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index fb52957..351bd9e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1022,11 +1022,6 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + goto out; + } + +- snprintf(buf, sizeof(buf), "%d", volinfo->client_op_version); +- ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, +- buf); +- if (ret) +- goto out; + if (volinfo->caps) { + snprintf(buf, sizeof(buf), "%d", volinfo->caps); + ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_VOL_CAPS, buf); +-- +1.8.3.1 + diff --git a/SOURCES/0085-Revert-all-remove-code-which-is-not-being-considered.patch b/SOURCES/0085-Revert-all-remove-code-which-is-not-being-considered.patch new file mode 100644 index 0000000..6addaff --- /dev/null +++ b/SOURCES/0085-Revert-all-remove-code-which-is-not-being-considered.patch @@ -0,0 +1,8976 @@ +From 379b9f7247a4daac9545e3dec79d3c2660111d8d Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Mon, 8 Apr 2019 11:32:09 +0530 +Subject: [PATCH 085/124] Revert "all: remove code which is not being + considered in build" + +This reverts most part of commit 8293d21280fd6ddfc9bb54068cf87794fc6be207. +It adds in the changes for tier and CTR with the neccesary changes for building it. + +Label: DOWNSTREAM ONLY + +Change-Id: I8f7978618f2a6a949b09dbcfd25722494cb8f1cd +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166245 +Reviewed-by: Nithya Balachandran <nbalacha@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + Makefile.am | 8 +- + configure.ac | 34 + + glusterfs.spec.in | 19 + + libglusterfs/Makefile.am | 4 +- + libglusterfs/src/glusterfs/mem-types.h | 1 + + xlators/cluster/dht/src/Makefile.am | 14 +- + xlators/cluster/dht/src/dht-rebalance.c | 12 + + xlators/cluster/dht/src/tier-common.c | 1199 ++++++++ + xlators/cluster/dht/src/tier-common.h | 55 + + xlators/cluster/dht/src/tier.c | 3105 ++++++++++++++++++++ + xlators/cluster/dht/src/tier.h | 110 + + xlators/features/Makefile.am | 2 +- + xlators/features/changetimerecorder/Makefile.am | 3 + + .../features/changetimerecorder/src/Makefile.am | 26 + + .../changetimerecorder/src/changetimerecorder.c | 2371 +++++++++++++++ + .../changetimerecorder/src/changetimerecorder.h | 21 + + .../features/changetimerecorder/src/ctr-helper.c | 293 ++ + .../features/changetimerecorder/src/ctr-helper.h | 854 ++++++ + .../features/changetimerecorder/src/ctr-messages.h | 61 + + .../changetimerecorder/src/ctr-xlator-ctx.c | 362 +++ + .../changetimerecorder/src/ctr-xlator-ctx.h | 68 + + .../changetimerecorder/src/ctr_mem_types.h | 22 + + 22 files changed, 8637 insertions(+), 7 deletions(-) + create mode 100644 xlators/cluster/dht/src/tier-common.c + create mode 100644 xlators/cluster/dht/src/tier-common.h + create mode 100644 xlators/cluster/dht/src/tier.c + create mode 100644 xlators/cluster/dht/src/tier.h + create mode 100644 xlators/features/changetimerecorder/Makefile.am + create mode 100644 xlators/features/changetimerecorder/src/Makefile.am + create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.c + create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.h + create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.c + create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.h + create mode 100644 xlators/features/changetimerecorder/src/ctr-messages.h + create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.c + create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.h + create mode 100644 xlators/features/changetimerecorder/src/ctr_mem_types.h + +diff --git a/Makefile.am b/Makefile.am +index e0c795f..613382f 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -3,7 +3,7 @@ SOURCES = site.h + EXTRA_DIST = autogen.sh \ + COPYING-GPLV2 COPYING-LGPLV3 COMMITMENT \ + INSTALL README.md AUTHORS THANKS NEWS \ +- glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \ ++ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in libgfdb.pc.in \ + run-tests.sh \ + build-aux/pkg-version \ + contrib/umountd \ +@@ -15,8 +15,12 @@ SUBDIRS = $(ARGP_STANDALONE_DIR) rpc/xdr/gen libglusterfs rpc api xlators \ + + pkgconfigdir = @pkgconfigdir@ + pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc ++if USE_GFDB ++pkgconfig_DATA += libgfdb.pc ++endif + +-CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile ++CLEANFILES = glusterfs-api.pc libgfchangelog.pc libgfdb.pc \ ++ contrib/umountd/Makefile + + gitclean: distclean + find . -name Makefile.in -exec rm -f {} \; +diff --git a/configure.ac b/configure.ac +index baa811a..633e850 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -30,6 +30,7 @@ AC_CONFIG_HEADERS([config.h site.h]) + AC_CONFIG_FILES([Makefile + libglusterfs/Makefile + libglusterfs/src/Makefile ++ libglusterfs/src/gfdb/Makefile + geo-replication/src/peer_gsec_create + geo-replication/src/peer_mountbroker + geo-replication/src/peer_mountbroker.py +@@ -121,6 +122,8 @@ AC_CONFIG_FILES([Makefile + xlators/features/changelog/src/Makefile + xlators/features/changelog/lib/Makefile + xlators/features/changelog/lib/src/Makefile ++ xlators/features/changetimerecorder/Makefile ++ xlators/features/changetimerecorder/src/Makefile + xlators/features/locks/Makefile + xlators/features/locks/src/Makefile + xlators/features/quota/Makefile +@@ -237,6 +240,7 @@ AC_CONFIG_FILES([Makefile + contrib/umountd/Makefile + glusterfs-api.pc + libgfchangelog.pc ++ libgfdb.pc + api/Makefile + api/src/Makefile + api/examples/Makefile +@@ -866,6 +870,33 @@ AM_CONDITIONAL([USE_FIREWALLD],test ["x${BUILD_FIREWALLD}" = "xyes"]) + + #endof firewald section + ++# Data tiering requires sqlite ++AC_ARG_ENABLE([tiering], ++ AC_HELP_STRING([--disable-tiering], ++ [Disable data classification/tiering]), ++ [BUILD_GFDB="${enableval}"], [BUILD_GFDB="yes"]) ++ ++case $host_os in ++ darwin*) ++ SQLITE_LIBS="-lsqlite3" ++ AC_CHECK_HEADERS([sqlite3.h], AC_DEFINE(USE_GFDB, 1)) ++ ;; ++ *) ++ if test "x${BUILD_GFDB}" = "xyes"; then ++ PKG_CHECK_MODULES([SQLITE], [sqlite3], ++ AC_DEFINE(USE_GFDB, 1), ++ AC_MSG_ERROR([pass --disable-tiering to build without sqlite])) ++ else ++ AC_DEFINE(USE_GFDB, 0, [no sqlite, gfdb is disabled]) ++ fi ++ ;; ++esac ++ ++AC_SUBST(SQLITE_CFLAGS) ++AC_SUBST(SQLITE_LIBS) ++AM_CONDITIONAL(BUILD_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes") ++AM_CONDITIONAL(USE_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes") ++ + # xml-output + AC_ARG_ENABLE([xml-output], + AC_HELP_STRING([--disable-xml-output], +@@ -1544,6 +1575,8 @@ GFAPI_VERSION="7."${PACKAGE_VERSION} + LIBGFCHANGELOG_VERSION="0.0.1" + AC_SUBST(GFAPI_VERSION) + AC_SUBST(LIBGFCHANGELOG_VERSION) ++LIBGFDB_VERSION="0.0.1" ++AC_SUBST(LIBGFDB_VERSION) + + dnl libtool versioning + LIBGFXDR_LT_VERSION="0:1:0" +@@ -1584,6 +1617,7 @@ echo "XML output : $BUILD_XML_OUTPUT" + echo "Unit Tests : $BUILD_UNITTEST" + echo "Track priv ports : $TRACK_PRIVPORTS" + echo "POSIX ACLs : $BUILD_POSIX_ACLS" ++echo "Data Classification : $BUILD_GFDB" + echo "firewalld-config : $BUILD_FIREWALLD" + echo "Events : $BUILD_EVENTS" + echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 2149f86..e0607ba 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -154,6 +154,7 @@ + %global _without_events --disable-events + %global _without_georeplication --disable-georeplication + %global _with_gnfs %{nil} ++%global _without_tiering --disable-tiering + %global _without_ocf --without-ocf + %endif + +@@ -287,6 +288,9 @@ BuildRequires: libuuid-devel + %if ( 0%{?_with_cmocka:1} ) + BuildRequires: libcmocka-devel >= 1.0.1 + %endif ++%if ( 0%{!?_without_tiering:1} ) ++BuildRequires: sqlite-devel ++%endif + %if ( 0%{!?_without_georeplication:1} ) + BuildRequires: libattr-devel + %endif +@@ -797,6 +801,7 @@ export LDFLAGS + %{?_without_rdma} \ + %{?_without_server} \ + %{?_without_syslog} \ ++ %{?_without_tiering} \ + %{?_with_ipv6default} \ + %{?_without_libtirpc} + +@@ -1232,9 +1237,15 @@ exit 0 + %if ( 0%{?_without_server:1} ) + %exclude %{_libdir}/pkgconfig/libgfchangelog.pc + %exclude %{_libdir}/libgfchangelog.so ++%if ( 0%{!?_without_tiering:1} ) ++%{_libdir}/pkgconfig/libgfdb.pc ++%endif + %else + %{_libdir}/pkgconfig/libgfchangelog.pc + %{_libdir}/libgfchangelog.so ++%if ( 0%{!?_without_tiering:1} ) ++%{_libdir}/pkgconfig/libgfdb.pc ++%endif + %endif + + %files client-xlators +@@ -1330,6 +1341,10 @@ exit 0 + %files libs + %{_libdir}/*.so.* + %exclude %{_libdir}/libgfapi.* ++%if ( 0%{!?_without_tiering:1} ) ++# libgfdb is only needed server-side ++%exclude %{_libdir}/libgfdb.* ++%endif + + %files -n python%{_pythonver}-gluster + # introducing glusterfs module in site packages. +@@ -1417,6 +1432,10 @@ exit 0 + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so ++%if ( 0%{!?_without_tiering:1} ) ++ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so ++ %{_libdir}/libgfdb.so.* ++%endif + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix* +diff --git a/libglusterfs/Makefile.am b/libglusterfs/Makefile.am +index d471a3f..7e72f61 100644 +--- a/libglusterfs/Makefile.am ++++ b/libglusterfs/Makefile.am +@@ -1,3 +1,3 @@ +-SUBDIRS = src ++SUBDIRS = src src/gfdb + +-CLEANFILES = ++CLEANFILES = +diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h +index 832f68c..92730a9 100644 +--- a/libglusterfs/src/glusterfs/mem-types.h ++++ b/libglusterfs/src/glusterfs/mem-types.h +@@ -138,6 +138,7 @@ enum gf_common_mem_types_ { + gf_common_volfile_t, + gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */ + gf_common_mt_server_cmdline_t, /* used only in one location */ ++ gf_mt_gfdb_query_record_t, + gf_common_mt_end + }; + #endif +diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am +index 56f1f2a..5532047 100644 +--- a/xlators/cluster/dht/src/Makefile.am ++++ b/xlators/cluster/dht/src/Makefile.am +@@ -1,4 +1,7 @@ + xlator_LTLIBRARIES = dht.la nufa.la switch.la ++if BUILD_GFDB ++ xlator_LTLIBRARIES += tier.la ++endif + + AM_CFLAGS = -Wall $(GF_CFLAGS) + +@@ -13,6 +16,7 @@ dht_la_SOURCES = $(dht_common_source) dht.c + + nufa_la_SOURCES = $(dht_common_source) nufa.c + switch_la_SOURCES = $(dht_common_source) switch.c ++tier_la_SOURCES = $(dht_common_source) tier.c tier-common.c + + dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +@@ -23,15 +27,21 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + ++tier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL) ++tier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la ++ + noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \ +- dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h ++ dht-lock.h tier-common.h tier.h \ ++ $(top_builddir)/xlators/lib/src/libxlator.h + + AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ ++ -I$(top_srcdir)/libglusterfs/src/gfdb \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/xlators/lib/src \ + -DDATADIR=\"$(localstatedir)\" \ +- -DLIBDIR=\"$(libdir)\" ++ -DLIBDIR=\"$(libdir)\" \ ++ -DLIBGFDB_VERSION=\"$(LIBGFDB_VERSION)\" + + CLEANFILES = + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index e0f25b1..efbe8a4 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -8,6 +8,7 @@ + cases as published by the Free Software Foundation. + */ + ++#include "tier.h" + #include "dht-common.h" + #include <glusterfs/xlator.h> + #include <glusterfs/syscall.h> +@@ -2134,6 +2135,17 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + } + } + ++ /* store size of previous migrated file */ ++ if (defrag && defrag->tier_conf.is_tier) { ++ if (from != TIER_HASHED_SUBVOL) { ++ defrag->tier_conf.st_last_promoted_size = stbuf.ia_size; ++ } else { ++ /* Don't delete the linkto file on the hashed subvol */ ++ delete_src_linkto = _gf_false; ++ defrag->tier_conf.st_last_demoted_size = stbuf.ia_size; ++ } ++ } ++ + /* The src file is being unlinked after this so we don't need + to clean it up */ + clean_src = _gf_false; +diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c +new file mode 100644 +index 0000000..b22f477 +--- /dev/null ++++ b/xlators/cluster/dht/src/tier-common.c +@@ -0,0 +1,1199 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include <glusterfs/glusterfs.h> ++#include <glusterfs/xlator.h> ++#include "libxlator.h" ++#include "dht-common.h" ++#include <glusterfs/defaults.h> ++#include "tier-common.h" ++#include "tier.h" ++ ++int ++dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ loc_t *oldloc = NULL; ++ loc_t *newloc = NULL; ++ ++ local = frame->local; ++ ++ oldloc = &local->loc; ++ newloc = &local->loc2; ++ ++ if (op_ret == -1) { ++ /* No continuation on DHT inode missing errors, as we should ++ * then have a good stbuf that states P2 happened. We would ++ * get inode missing if, the file completed migrated between ++ * the lookup and the link call */ ++ goto out; ++ } ++ ++ if (local->call_cnt != 1) { ++ goto out; ++ } ++ ++ local->call_cnt = 2; ++ ++ /* Do this on the hot tier now */ ++ ++ STACK_WIND(frame, tier_link_cbk, local->cached_subvol, ++ local->cached_subvol->fops->link, oldloc, newloc, xdata); ++ ++ return 0; ++ ++out: ++ DHT_STRIP_PHASE1_FLAGS(stbuf); ++ ++ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, ++ postparent, NULL); ++ ++ return 0; ++} ++ ++int ++tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata) ++{ ++ xlator_t *cached_subvol = NULL; ++ xlator_t *hashed_subvol = NULL; ++ int op_errno = -1; ++ int ret = -1; ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(oldloc, err); ++ VALIDATE_OR_GOTO(newloc, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ local->call_cnt = 1; ++ ++ cached_subvol = local->cached_subvol; ++ ++ if (!cached_subvol) { ++ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", ++ oldloc->path); ++ op_errno = ENOENT; ++ goto err; ++ } ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ ret = loc_copy(&local->loc2, newloc); ++ if (ret == -1) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ if (hashed_subvol == cached_subvol) { ++ STACK_WIND(frame, dht_link_cbk, cached_subvol, ++ cached_subvol->fops->link, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ /* Create hardlinks to both the data file on the hot tier ++ and the linkto file on the cold tier */ ++ ++ gf_uuid_copy(local->gfid, oldloc->inode->gfid); ++ ++ STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link, ++ oldloc, newloc, xdata); ++ ++ return 0; ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); ++ return 0; ++} ++ ++int ++tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->params) { ++ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); ++ } ++ ++ DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ ++ return 0; ++} ++ ++int ++tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ xlator_t *prev = NULL; ++ int ret = -1; ++ dht_local_t *local = NULL; ++ xlator_t *hashed_subvol = NULL; ++ dht_conf_t *conf = NULL; ++ ++ local = frame->local; ++ conf = this->private; ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ if (!local) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ if (op_ret == -1) { ++ if (local->linked == _gf_true && local->xattr_req) { ++ local->op_errno = op_errno; ++ local->op_ret = op_ret; ++ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file( ++ local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value to " ++ "unlink of migrating file"); ++ goto out; ++ } ++ ++ STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk, ++ hashed_subvol, hashed_subvol->fops->unlink, &local->loc, ++ 0, local->xattr_req); ++ return 0; ++ } ++ goto out; ++ } ++ ++ prev = cookie; ++ ++ if (local->loc.parent) { ++ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); ++ ++ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); ++ } ++ ++ ret = dht_layout_preset(this, prev, inode); ++ if (ret != 0) { ++ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s", ++ prev->name); ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ local->op_errno = op_errno; ++ ++ if (local->linked == _gf_true) { ++ local->stbuf = *stbuf; ++ dht_linkfile_attr_heal(frame, this); ++ } ++out: ++ if (local) { ++ if (local->xattr_req) { ++ dict_del(local->xattr_req, TIER_LINKFILE_GFID); ++ } ++ } ++ ++ DHT_STRIP_PHASE1_FLAGS(stbuf); ++ ++ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ ++ return 0; ++} ++ ++int ++tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *cached_subvol = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = -1; ++ unsigned char *gfid = NULL; ++ ++ local = frame->local; ++ if (!local) { ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ conf = this->private; ++ if (!conf) { ++ local->op_errno = EINVAL; ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ cached_subvol = TIER_UNHASHED_SUBVOL; ++ ++ if (local->params) { ++ dict_del(local->params, conf->link_xattr_name); ++ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); ++ } ++ ++ /* ++ * We will delete the linkfile if data file creation fails. ++ * When deleting this stale linkfile, there is a possibility ++ * for a race between this linkfile deletion and a stale ++ * linkfile deletion triggered by another lookup from different ++ * client. ++ * ++ * For eg: ++ * ++ * Client 1 Client 2 ++ * ++ * 1 linkfile created for foo ++ * ++ * 2 data file creation failed ++ * ++ * 3 creating a file with same name ++ * ++ * 4 lookup before creation deleted ++ * the linkfile created by client1 ++ * considering as a stale linkfile. ++ * ++ * 5 New linkfile created for foo ++ * with different gfid. ++ * ++ * 6 Trigger linkfile deletion as ++ * data file creation failed. ++ * ++ * 7 Linkfile deleted which is ++ * created by client2. ++ * ++ * 8 Data file created. ++ * ++ * With this race, we will end up having a file in a non-hashed subvol ++ * without a linkfile in hashed subvol. ++ * ++ * To avoid this, we store the gfid of linkfile created by client, So ++ * If we delete the linkfile , we validate gfid of existing file with ++ * stored value from posix layer. ++ * ++ * Storing this value in local->xattr_req as local->params was also used ++ * to create the data file. During the linkfile deletion we will use ++ * local->xattr_req dictionary. ++ */ ++ if (!local->xattr_req) { ++ local->xattr_req = dict_new(); ++ if (!local->xattr_req) { ++ local->op_errno = ENOMEM; ++ op_errno = ENOMEM; ++ goto err; ++ } ++ } ++ ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); ++ if (!gfid) { ++ local->op_errno = ENOMEM; ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ gf_uuid_copy(gfid, stbuf->ia_gfid); ++ ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid, ++ sizeof(uuid_t)); ++ if (ret) { ++ GF_FREE(gfid); ++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value" ++ " : key = %s", ++ TIER_LINKFILE_GFID); ++ } ++ ++ STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol, ++ cached_subvol->fops->create, &local->loc, local->flags, ++ local->mode, local->umask, local->fd, local->params); ++ ++ return 0; ++err: ++ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, ++ NULL); ++ return 0; ++} ++ ++gf_boolean_t ++tier_is_hot_tier_decommissioned(xlator_t *this) ++{ ++ dht_conf_t *conf = NULL; ++ xlator_t *hot_tier = NULL; ++ int i = 0; ++ ++ conf = this->private; ++ hot_tier = conf->subvolumes[1]; ++ ++ if (conf->decommission_subvols_cnt) { ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (conf->decommissioned_bricks[i] && ++ conf->decommissioned_bricks[i] == hot_tier) ++ return _gf_true; ++ } ++ } ++ ++ return _gf_false; ++} ++ ++int ++tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *params) ++{ ++ int op_errno = -1; ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ xlator_t *hot_subvol = NULL; ++ xlator_t *cold_subvol = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(loc, err); ++ ++ conf = this->private; ++ ++ dht_get_du_info(frame, this, loc); ++ ++ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ cold_subvol = TIER_HASHED_SUBVOL; ++ hot_subvol = TIER_UNHASHED_SUBVOL; ++ ++ if (conf->subvolumes[0] != cold_subvol) { ++ hot_subvol = conf->subvolumes[0]; ++ } ++ /* ++ * if hot tier full, write to cold. ++ * Also if hot tier is full, create in cold ++ */ ++ if (dht_is_subvol_filled(this, hot_subvol) || ++ tier_is_hot_tier_decommissioned(this)) { ++ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, ++ cold_subvol->name); ++ ++ STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol, ++ cold_subvol->fops->create, loc, flags, mode, umask, ++ fd, params); ++ } else { ++ local->params = dict_ref(params); ++ local->flags = flags; ++ local->mode = mode; ++ local->umask = umask; ++ local->cached_subvol = hot_subvol; ++ local->hashed_subvol = cold_subvol; ++ ++ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path, ++ hot_subvol->name, cold_subvol->name); ++ ++ dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this, ++ hot_subvol, cold_subvol, loc); ++ ++ goto out; ++ } ++out: ++ return 0; ++ ++err: ++ ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, ++ NULL); ++ ++ return 0; ++} ++ ++int ++tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ ++ LOCK(&frame->lock); ++ { ++ if ((op_ret == -1) && (op_errno != ENOENT)) { ++ local->op_errno = op_errno; ++ local->op_ret = op_ret; ++ gf_msg_debug(this->name, op_errno, ++ "Unlink link: subvolume %s" ++ " returned -1", ++ prev->name); ++ goto unlock; ++ } ++ ++ local->op_ret = 0; ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ if (local->op_ret == -1) ++ goto err; ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, NULL); ++ ++ return 0; ++ ++err: ++ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); ++ return 0; ++} ++ ++int ++tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, inode_t *inode, ++ struct iatt *preparent, dict_t *xdata, ++ struct iatt *postparent) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ dht_conf_t *conf = NULL; ++ xlator_t *hot_subvol = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ conf = this->private; ++ hot_subvol = TIER_UNHASHED_SUBVOL; ++ ++ if (!op_ret) { ++ /* ++ * linkfile present on hot tier. unlinking the linkfile ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol, ++ hot_subvol, hot_subvol->fops->unlink, &local->loc, ++ local->flags, NULL); ++ return 0; ++ } ++ ++ LOCK(&frame->lock); ++ { ++ if (op_errno == ENOENT) { ++ local->op_ret = 0; ++ local->op_errno = op_errno; ++ } else { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1", ++ prev->name); ++ } ++ ++ UNLOCK(&frame->lock); ++ ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, xdata); ++ ++ return 0; ++} ++ ++int ++tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int op_ret, int op_errno, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ ++ LOCK(&frame->lock); ++ { ++ /* Ignore EINVAL for tier to ignore error when the file ++ does not exist on the other tier */ ++ if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) { ++ local->op_errno = op_errno; ++ local->op_ret = op_ret; ++ gf_msg_debug(this->name, op_errno, ++ "Unlink link: subvolume %s" ++ " returned -1", ++ prev->name); ++ goto unlock; ++ } ++ ++ local->op_ret = 0; ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ if (local->op_ret == -1) ++ goto err; ++ ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, xdata); ++ ++ return 0; ++ ++err: ++ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); ++ return 0; ++} ++ ++int32_t ++tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ xlator_t *prev = NULL; ++ struct iatt *stbuf = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = -1; ++ xlator_t *hot_tier = NULL; ++ xlator_t *cold_tier = NULL; ++ ++ local = frame->local; ++ prev = cookie; ++ conf = this->private; ++ ++ cold_tier = TIER_HASHED_SUBVOL; ++ hot_tier = TIER_UNHASHED_SUBVOL; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret == -1) { ++ if (op_errno == ENOENT) { ++ local->op_ret = 0; ++ } else { ++ local->op_ret = -1; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, op_errno, ++ "Unlink: subvolume %s returned -1" ++ " with errno = %d", ++ prev->name, op_errno); ++ goto unlock; ++ } ++ ++ local->op_ret = 0; ++ ++ local->postparent = *postparent; ++ local->preparent = *preparent; ++ ++ if (local->loc.parent) { ++ dht_inode_ctx_time_update(local->loc.parent, this, ++ &local->preparent, 0); ++ dht_inode_ctx_time_update(local->loc.parent, this, ++ &local->postparent, 1); ++ } ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ if (local->op_ret) ++ goto out; ++ ++ if (cold_tier != local->cached_subvol) { ++ /* ++ * File is present in hot tier, so there will be ++ * a link file on cold tier, deleting the linkfile ++ * from cold tier ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier, ++ cold_tier->fops->unlink, &local->loc, local->flags, ++ xdata); ++ return 0; ++ } ++ ++ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); ++ if (!ret && stbuf && ++ ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) { ++ /* ++ * File is migrating from cold to hot tier. ++ * Delete the destination linkfile. ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier, ++ hot_tier->fops->lookup, &local->loc, NULL); ++ return 0; ++ } ++ ++out: ++ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preparent, &local->postparent, xdata); ++ ++ return 0; ++} ++ ++int ++tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) ++{ ++ xlator_t *cached_subvol = NULL; ++ xlator_t *hashed_subvol = NULL; ++ dht_conf_t *conf = NULL; ++ int op_errno = -1; ++ dht_local_t *local = NULL; ++ int ret = -1; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(loc, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK); ++ if (!local) { ++ op_errno = ENOMEM; ++ ++ goto err; ++ } ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ cached_subvol = local->cached_subvol; ++ if (!cached_subvol) { ++ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", ++ loc->path); ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ local->flags = xflag; ++ if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) { ++ /* ++ * File resides in cold tier. We need to stat ++ * the file to see if it is being promoted. ++ * If yes we need to delete the destination ++ * file as well. ++ * ++ * Currently we are doing this check only for ++ * regular files. ++ */ ++ xdata = xdata ? dict_ref(xdata) : dict_new(); ++ if (xdata) { ++ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s", ++ DHT_IATT_IN_XDATA_KEY); ++ } ++ } ++ } ++ ++ /* ++ * File is on hot tier, delete the data file first, then ++ * linkfile from cold. ++ */ ++ STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol, ++ cached_subvol->fops->unlink, loc, xflag, xdata); ++ if (xdata) ++ dict_unref(xdata); ++ return 0; ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); ++ ++ return 0; ++} ++ ++int ++tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) ++{ ++ gf_dirent_t entries; ++ gf_dirent_t *orig_entry = NULL; ++ gf_dirent_t *entry = NULL; ++ int count = 0; ++ ++ INIT_LIST_HEAD(&entries.list); ++ ++ if (op_ret < 0) ++ goto unwind; ++ ++ list_for_each_entry(orig_entry, (&orig_entries->list), list) ++ { ++ entry = gf_dirent_for_name(orig_entry->d_name); ++ if (!entry) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, ++ "Memory allocation failed "); ++ goto unwind; ++ } ++ ++ entry->d_off = orig_entry->d_off; ++ entry->d_ino = orig_entry->d_ino; ++ entry->d_type = orig_entry->d_type; ++ entry->d_len = orig_entry->d_len; ++ ++ list_add_tail(&entry->list, &entries.list); ++ count++; ++ } ++ op_ret = count; ++ ++unwind: ++ if (op_ret < 0) ++ op_ret = 0; ++ ++ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL); ++ ++ gf_dirent_free(&entries); ++ ++ return 0; ++} ++ ++int ++tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ gf_dirent_t entries; ++ gf_dirent_t *orig_entry = NULL; ++ gf_dirent_t *entry = NULL; ++ xlator_t *prev = NULL; ++ xlator_t *next_subvol = NULL; ++ off_t next_offset = 0; ++ int count = 0; ++ dht_conf_t *conf = NULL; ++ int ret = 0; ++ inode_table_t *itable = NULL; ++ inode_t *inode = NULL; ++ ++ INIT_LIST_HEAD(&entries.list); ++ prev = cookie; ++ local = frame->local; ++ itable = local->fd ? local->fd->inode->table : NULL; ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, unwind); ++ ++ if (op_ret < 0) ++ goto done; ++ ++ list_for_each_entry(orig_entry, (&orig_entries->list), list) ++ { ++ next_offset = orig_entry->d_off; ++ ++ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { ++ /*stat failed somewhere- ignore this entry*/ ++ continue; ++ } ++ ++ entry = gf_dirent_for_name(orig_entry->d_name); ++ if (!entry) { ++ goto unwind; ++ } ++ ++ entry->d_off = orig_entry->d_off; ++ entry->d_stat = orig_entry->d_stat; ++ entry->d_ino = orig_entry->d_ino; ++ entry->d_type = orig_entry->d_type; ++ entry->d_len = orig_entry->d_len; ++ ++ if (orig_entry->dict) ++ entry->dict = dict_ref(orig_entry->dict); ++ ++ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict, ++ conf->link_xattr_name)) { ++ goto entries; ++ ++ } else if (IA_ISDIR(entry->d_stat.ia_type)) { ++ if (orig_entry->inode) { ++ dht_inode_ctx_time_update(orig_entry->inode, this, ++ &entry->d_stat, 1); ++ } ++ } else { ++ if (orig_entry->inode) { ++ ret = dht_layout_preset(this, prev, orig_entry->inode); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_LAYOUT_SET_FAILED, ++ "failed to link the layout " ++ "in inode"); ++ ++ entry->inode = inode_ref(orig_entry->inode); ++ } else if (itable) { ++ /* ++ * orig_entry->inode might be null if any upper ++ * layer xlators below client set to null, to ++ * force a lookup on the inode even if the inode ++ * is present in the inode table. In that case ++ * we just update the ctx to make sure we didn't ++ * missed anything. ++ */ ++ inode = inode_find(itable, orig_entry->d_stat.ia_gfid); ++ if (inode) { ++ ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ DHT_MSG_LAYOUT_SET_FAILED, ++ "failed to link the layout" ++ " in inode"); ++ inode_unref(inode); ++ inode = NULL; ++ } ++ } ++ } ++ ++ entries: ++ list_add_tail(&entry->list, &entries.list); ++ count++; ++ } ++ op_ret = count; ++ ++done: ++ if (count == 0) { ++ /* non-zero next_offset means that ++ EOF is not yet hit on the current subvol ++ */ ++ if (next_offset != 0) { ++ next_subvol = prev; ++ } else { ++ goto unwind; ++ } ++ ++ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol, ++ next_subvol->fops->readdirp, local->fd, local->size, ++ next_offset, local->xattr); ++ return 0; ++ } ++ ++unwind: ++ if (op_ret < 0) ++ op_ret = 0; ++ ++ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL); ++ ++ gf_dirent_free(&entries); ++ ++ return 0; ++} ++ ++int ++tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, int whichop, dict_t *dict) ++{ ++ dht_local_t *local = NULL; ++ int op_errno = -1; ++ xlator_t *hashed_subvol = NULL; ++ int ret = 0; ++ dht_conf_t *conf = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(fd, err); ++ VALIDATE_OR_GOTO(this->private, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, NULL, NULL, whichop); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ local->fd = fd_ref(fd); ++ local->size = size; ++ local->xattr_req = (dict) ? dict_ref(dict) : NULL; ++ ++ hashed_subvol = TIER_HASHED_SUBVOL; ++ ++ /* TODO: do proper readdir */ ++ if (whichop == GF_FOP_READDIRP) { ++ if (dict) ++ local->xattr = dict_ref(dict); ++ else ++ local->xattr = dict_new(); ++ ++ if (local->xattr) { ++ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set dictionary value" ++ " : key = %s", ++ conf->link_xattr_name); ++ } ++ ++ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol, ++ hashed_subvol, hashed_subvol->fops->readdirp, fd, ++ size, yoff, local->xattr); ++ ++ } else { ++ STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol, ++ hashed_subvol->fops->readdir, fd, size, yoff, ++ local->xattr); ++ } ++ ++ return 0; ++ ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); ++ ++ return 0; ++} ++ ++int ++tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, dict_t *xdata) ++{ ++ int op = GF_FOP_READDIR; ++ dht_conf_t *conf = NULL; ++ int i = 0; ++ ++ conf = this->private; ++ if (!conf) ++ goto out; ++ ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (!conf->subvolume_status[i]) { ++ op = GF_FOP_READDIRP; ++ break; ++ } ++ } ++ ++ if (conf->use_readdirp) ++ op = GF_FOP_READDIRP; ++ ++out: ++ tier_do_readdir(frame, this, fd, size, yoff, op, 0); ++ return 0; ++} ++ ++int ++tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, dict_t *dict) ++{ ++ tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); ++ return 0; ++} ++ ++int ++tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, struct statvfs *statvfs, dict_t *xdata) ++{ ++ gf_boolean_t event = _gf_false; ++ qdstatfs_action_t action = qdstatfs_action_OFF; ++ dht_local_t *local = NULL; ++ int this_call_cnt = 0; ++ int bsize = 0; ++ int frsize = 0; ++ GF_UNUSED int ret = 0; ++ unsigned long new_usage = 0; ++ unsigned long cur_usage = 0; ++ xlator_t *prev = NULL; ++ dht_conf_t *conf = NULL; ++ tier_statvfs_t *tier_stat = NULL; ++ ++ prev = cookie; ++ local = frame->local; ++ GF_ASSERT(local); ++ ++ conf = this->private; ++ ++ if (xdata) ++ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event); ++ ++ tier_stat = &local->tier_statvfs; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ goto unlock; ++ } ++ if (!statvfs) { ++ op_errno = EINVAL; ++ local->op_ret = -1; ++ goto unlock; ++ } ++ local->op_ret = 0; ++ ++ if (local->quota_deem_statfs) { ++ if (event == _gf_true) { ++ action = qdstatfs_action_COMPARE; ++ } else { ++ action = qdstatfs_action_NEGLECT; ++ } ++ } else { ++ if (event == _gf_true) { ++ action = qdstatfs_action_REPLACE; ++ local->quota_deem_statfs = _gf_true; ++ } ++ } ++ ++ if (local->quota_deem_statfs) { ++ switch (action) { ++ case qdstatfs_action_NEGLECT: ++ goto unlock; ++ ++ case qdstatfs_action_REPLACE: ++ local->statvfs = *statvfs; ++ goto unlock; ++ ++ case qdstatfs_action_COMPARE: ++ new_usage = statvfs->f_blocks - statvfs->f_bfree; ++ cur_usage = local->statvfs.f_blocks - ++ local->statvfs.f_bfree; ++ ++ /* Take the max of the usage from subvols */ ++ if (new_usage >= cur_usage) ++ local->statvfs = *statvfs; ++ goto unlock; ++ ++ default: ++ break; ++ } ++ } ++ ++ if (local->statvfs.f_bsize != 0) { ++ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); ++ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); ++ dht_normalize_stats(&local->statvfs, bsize, frsize); ++ dht_normalize_stats(statvfs, bsize, frsize); ++ } else { ++ local->statvfs.f_bsize = statvfs->f_bsize; ++ local->statvfs.f_frsize = statvfs->f_frsize; ++ } ++ ++ if (prev == TIER_HASHED_SUBVOL) { ++ local->statvfs.f_blocks = statvfs->f_blocks; ++ local->statvfs.f_files = statvfs->f_files; ++ local->statvfs.f_fsid = statvfs->f_fsid; ++ local->statvfs.f_flag = statvfs->f_flag; ++ local->statvfs.f_namemax = statvfs->f_namemax; ++ tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree); ++ tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail); ++ tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree); ++ tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail); ++ tier_stat->hashed_fsid = statvfs->f_fsid; ++ } else { ++ tier_stat->unhashed_fsid = statvfs->f_fsid; ++ tier_stat->unhashed_blocks_used = (statvfs->f_blocks - ++ statvfs->f_bfree); ++ tier_stat->unhashed_pblocks_used = (statvfs->f_blocks - ++ statvfs->f_bavail); ++ tier_stat->unhashed_files_used = (statvfs->f_files - ++ statvfs->f_ffree); ++ tier_stat->unhashed_pfiles_used = (statvfs->f_files - ++ statvfs->f_favail); ++ } ++ } ++unlock: ++ UNLOCK(&frame->lock); ++ ++ this_call_cnt = dht_frame_return(frame); ++ if (is_last_call(this_call_cnt)) { ++ if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) { ++ tier_stat->blocks_used += tier_stat->unhashed_blocks_used; ++ tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used; ++ tier_stat->files_used += tier_stat->unhashed_files_used; ++ tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used; ++ } ++ local->statvfs.f_bfree = local->statvfs.f_blocks - ++ tier_stat->blocks_used; ++ local->statvfs.f_bavail = local->statvfs.f_blocks - ++ tier_stat->pblocks_used; ++ local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used; ++ local->statvfs.f_favail = local->statvfs.f_files - ++ tier_stat->pfiles_used; ++ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, ++ &local->statvfs, xdata); ++ } ++ ++ return 0; ++} ++ ++int ++tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) ++{ ++ dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; ++ int op_errno = -1; ++ int i = -1; ++ inode_t *inode = NULL; ++ inode_table_t *itable = NULL; ++ uuid_t root_gfid = { ++ 0, ++ }; ++ loc_t newloc = { ++ 0, ++ }; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(loc, err); ++ VALIDATE_OR_GOTO(this->private, err); ++ ++ conf = this->private; ++ ++ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS); ++ if (!local) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) { ++ itable = loc->inode->table; ++ if (!itable) { ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ loc = &local->loc2; ++ root_gfid[15] = 1; ++ ++ inode = inode_find(itable, root_gfid); ++ if (!inode) { ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ dht_build_root_loc(inode, &newloc); ++ loc = &newloc; ++ } ++ ++ local->call_cnt = conf->subvolume_cnt; ++ ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i], ++ conf->subvolumes[i], ++ conf->subvolumes[i]->fops->statfs, loc, xdata); ++ } ++ ++ return 0; ++ ++err: ++ op_errno = (op_errno == -1) ? errno : op_errno; ++ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); ++ ++ return 0; ++} +diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h +new file mode 100644 +index 0000000..b1ebaa8 +--- /dev/null ++++ b/xlators/cluster/dht/src/tier-common.h +@@ -0,0 +1,55 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef _TIER_COMMON_H_ ++#define _TIER_COMMON_H_ ++/* Function definitions */ ++int ++tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata); ++ ++int ++tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *params); ++ ++int32_t ++tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata); ++ ++int32_t ++tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t off, dict_t *dict); ++ ++int ++tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t yoff, dict_t *xdata); ++ ++int ++tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata); ++ ++int ++tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); ++ ++#endif +diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c +new file mode 100644 +index 0000000..94b4c63 +--- /dev/null ++++ b/xlators/cluster/dht/src/tier.c +@@ -0,0 +1,3105 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include <dlfcn.h> ++ ++#include "dht-common.h" ++#include "tier.h" ++#include "tier-common.h" ++#include <glusterfs/syscall.h> ++#include <glusterfs/events.h> ++#include "tier-ctr-interface.h" ++ ++/*Hard coded DB info*/ ++static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3; ++/*Hard coded DB info*/ ++ ++/*Mutex for updating the data movement stats*/ ++static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++/* Stores the path location of promotion query files */ ++static char *promotion_qfile; ++/* Stores the path location of demotion query files */ ++static char *demotion_qfile; ++ ++static void *libhandle; ++static gfdb_methods_t gfdb_methods; ++ ++#define DB_QUERY_RECORD_SIZE 4096 ++ ++/* ++ * Closes all the fds and frees the qfile_array ++ * */ ++static void ++qfile_array_free(tier_qfile_array_t *qfile_array) ++{ ++ ssize_t i = 0; ++ ++ if (qfile_array) { ++ if (qfile_array->fd_array) { ++ for (i = 0; i < qfile_array->array_size; i++) { ++ if (qfile_array->fd_array[i] != -1) { ++ sys_close(qfile_array->fd_array[i]); ++ } ++ } ++ } ++ GF_FREE(qfile_array->fd_array); ++ } ++ GF_FREE(qfile_array); ++} ++ ++/* Create a new query file list with given size */ ++static tier_qfile_array_t * ++qfile_array_new(ssize_t array_size) ++{ ++ int ret = -1; ++ tier_qfile_array_t *qfile_array = NULL; ++ ssize_t i = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out); ++ ++ qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t), ++ gf_tier_mt_qfile_array_t); ++ if (!qfile_array) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to allocate memory for tier_qfile_array_t"); ++ goto out; ++ } ++ ++ qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int), ++ gf_dht_mt_int32_t); ++ if (!qfile_array->fd_array) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to allocate memory for " ++ "tier_qfile_array_t->fd_array"); ++ goto out; ++ } ++ ++ /* Init all the fds to -1 */ ++ for (i = 0; i < array_size; i++) { ++ qfile_array->fd_array[i] = -1; ++ } ++ ++ qfile_array->array_size = array_size; ++ qfile_array->next_index = 0; ++ ++ /* Set exhausted count to list size as the list is empty */ ++ qfile_array->exhausted_count = qfile_array->array_size; ++ ++ ret = 0; ++out: ++ if (ret) { ++ qfile_array_free(qfile_array); ++ qfile_array = NULL; ++ } ++ return qfile_array; ++} ++ ++/* Checks if the query file list is empty or totally exhausted. */ ++static gf_boolean_t ++is_qfile_array_empty(tier_qfile_array_t *qfile_array) ++{ ++ return (qfile_array->exhausted_count == qfile_array->array_size) ++ ? _gf_true ++ : _gf_false; ++} ++ ++/* Shifts the next_fd pointer to the next available fd in the list */ ++static void ++shift_next_index(tier_qfile_array_t *qfile_array) ++{ ++ int qfile_fd = 0; ++ int spin_count = 0; ++ ++ if (is_qfile_array_empty(qfile_array)) { ++ return; ++ } ++ ++ do { ++ /* change next_index in a rotional manner */ ++ (qfile_array->next_index == (qfile_array->array_size - 1)) ++ ? qfile_array->next_index = 0 ++ : qfile_array->next_index++; ++ ++ qfile_fd = (qfile_array->fd_array[qfile_array->next_index]); ++ ++ spin_count++; ++ ++ } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size)); ++} ++ ++/* ++ * This is a non-thread safe function to read query records ++ * from a list of query files in a Round-Robin manner. ++ * As in when the query files get exhuasted they are closed. ++ * Returns: ++ * 0 if all the query records in all the query files of the list are ++ * exhausted. ++ * > 0 if a query record is successfully read. Indicates the size of the query ++ * record read. ++ * < 0 if there was failure ++ * */ ++static int ++read_query_record_list(tier_qfile_array_t *qfile_array, ++ gfdb_query_record_t **query_record) ++{ ++ int ret = -1; ++ int qfile_fd = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", qfile_array, out); ++ GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out); ++ ++ do { ++ if (is_qfile_array_empty(qfile_array)) { ++ ret = 0; ++ break; ++ } ++ ++ qfile_fd = qfile_array->fd_array[qfile_array->next_index]; ++ ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record); ++ if (ret <= 0) { ++ /*The qfile_fd has reached EOF or ++ * there was an error. ++ * 1. Close the exhausted fd ++ * 2. increment the exhausted count ++ * 3. shift next_qfile to next qfile ++ **/ ++ sys_close(qfile_fd); ++ qfile_array->fd_array[qfile_array->next_index] = -1; ++ qfile_array->exhausted_count++; ++ /* shift next_qfile to next qfile */ ++ shift_next_index(qfile_array); ++ continue; ++ } else { ++ /* shift next_qfile to next qfile */ ++ shift_next_index(qfile_array); ++ break; ++ } ++ } while (1); ++out: ++ return ret; ++} ++ ++/* Check and update the watermark every WM_INTERVAL seconds */ ++#define WM_INTERVAL 5 ++#define WM_INTERVAL_EMERG 1 ++ ++static int ++tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) ++{ ++ int ret = -1; ++ dict_t *dict = NULL; ++ char *uuid_str = NULL; ++ uuid_t node_uuid = { ++ 0, ++ }; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, loc, out); ++ GF_VALIDATE_OR_GOTO(this->name, defrag, out); ++ ++ if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path); ++ goto out; ++ } ++ ++ if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get node-uuids for %s", loc->path); ++ goto out; ++ } ++ ++ if (gf_uuid_parse(uuid_str, node_uuid)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "uuid_parse failed for %s", loc->path); ++ goto out; ++ } ++ ++ if (gf_uuid_compare(node_uuid, defrag->node_uuid)) { ++ gf_msg_debug(this->name, 0, "%s does not belong to this node", ++ loc->path); ++ ret = 1; ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (dict) ++ dict_unref(dict); ++ ++ return ret; ++} ++ ++int ++tier_get_fs_stat(xlator_t *this, loc_t *root_loc) ++{ ++ int ret = 0; ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ dict_t *xdata = NULL; ++ struct statvfs statfs = { ++ 0, ++ }; ++ gf_tier_conf_t *tier_conf = NULL; ++ ++ conf = this->private; ++ if (!conf) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "conf is NULL"); ++ ret = -1; ++ goto exit; ++ } ++ ++ defrag = conf->defrag; ++ if (!defrag) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "defrag is NULL"); ++ ret = -1; ++ goto exit; ++ } ++ ++ tier_conf = &defrag->tier_conf; ++ ++ xdata = dict_new(); ++ if (!xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, ++ "failed to allocate dictionary"); ++ ret = -1; ++ goto exit; ++ } ++ ++ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, ++ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); ++ ret = -1; ++ goto exit; ++ } ++ ++ /* Find how much free space is on the hot subvolume. ++ * Then see if that value */ ++ /* is less than or greater than user defined watermarks. ++ * Stash results in */ ++ /* the tier_conf data structure. */ ++ ++ ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS, ++ "Unable to obtain statfs."); ++ goto exit; ++ } ++ ++ pthread_mutex_lock(&dm_stat_mutex); ++ ++ tier_conf->block_size = statfs.f_bsize; ++ tier_conf->blocks_total = statfs.f_blocks; ++ tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree; ++ ++ tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used, ++ statfs.f_blocks); ++ pthread_mutex_unlock(&dm_stat_mutex); ++ ++exit: ++ if (xdata) ++ dict_unref(xdata); ++ return ret; ++} ++ ++static void ++tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm, ++ tier_watermark_op_t new_wm) ++{ ++ if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) { ++ if (new_wm == TIER_WM_MID) { ++ gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname); ++ } else if (new_wm == TIER_WM_HI) { ++ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); ++ } ++ } else if (old_wm == TIER_WM_MID) { ++ if (new_wm == TIER_WM_LOW) { ++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); ++ } else if (new_wm == TIER_WM_HI) { ++ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); ++ } ++ } else if (old_wm == TIER_WM_HI) { ++ if (new_wm == TIER_WM_MID) { ++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname); ++ } else if (new_wm == TIER_WM_LOW) { ++ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); ++ } ++ } ++} ++ ++int ++tier_check_watermark(xlator_t *this) ++{ ++ int ret = -1; ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ gf_tier_conf_t *tier_conf = NULL; ++ tier_watermark_op_t wm = TIER_WM_NONE; ++ ++ conf = this->private; ++ if (!conf) ++ goto exit; ++ ++ defrag = conf->defrag; ++ if (!defrag) ++ goto exit; ++ ++ tier_conf = &defrag->tier_conf; ++ ++ if (tier_conf->percent_full < tier_conf->watermark_low) { ++ wm = TIER_WM_LOW; ++ ++ } else if (tier_conf->percent_full < tier_conf->watermark_hi) { ++ wm = TIER_WM_MID; ++ ++ } else { ++ wm = TIER_WM_HI; ++ } ++ ++ if (wm != tier_conf->watermark_last) { ++ tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last, ++ wm); ++ ++ tier_conf->watermark_last = wm; ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Tier watermark now %d", wm); ++ } ++ ++ ret = 0; ++ ++exit: ++ return ret; ++} ++ ++static gf_boolean_t ++is_hot_tier_full(gf_tier_conf_t *tier_conf) ++{ ++ if (tier_conf && (tier_conf->mode == TIER_MODE_WM) && ++ (tier_conf->watermark_last == TIER_WM_HI)) ++ return _gf_true; ++ ++ return _gf_false; ++} ++ ++int ++tier_do_migration(xlator_t *this, int promote) ++{ ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ long rand = 0; ++ int migrate = 0; ++ gf_tier_conf_t *tier_conf = NULL; ++ ++ conf = this->private; ++ if (!conf) ++ goto exit; ++ ++ defrag = conf->defrag; ++ if (!defrag) ++ goto exit; ++ ++ if (tier_check_watermark(this) != 0) { ++ gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get watermark"); ++ goto exit; ++ } ++ ++ tier_conf = &defrag->tier_conf; ++ ++ switch (tier_conf->watermark_last) { ++ case TIER_WM_LOW: ++ migrate = promote ? 1 : 0; ++ break; ++ case TIER_WM_HI: ++ migrate = promote ? 0 : 1; ++ break; ++ case TIER_WM_MID: ++ /* coverity[DC.WEAK_CRYPTO] */ ++ rand = random() % 100; ++ if (promote) { ++ migrate = (rand > tier_conf->percent_full); ++ } else { ++ migrate = (rand <= tier_conf->percent_full); ++ } ++ break; ++ } ++ ++exit: ++ return migrate; ++} ++ ++int ++tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc, ++ gf_tier_conf_t *tier_conf) ++{ ++ int ret = -1; ++ ++ pthread_mutex_lock(&tier_conf->pause_mutex); ++ if (is_promotion) ++ tier_conf->promote_in_progress = 1; ++ else ++ tier_conf->demote_in_progress = 1; ++ pthread_mutex_unlock(&tier_conf->pause_mutex); ++ ++ /* Data migration */ ++ ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL); ++ ++ pthread_mutex_lock(&tier_conf->pause_mutex); ++ if (is_promotion) ++ tier_conf->promote_in_progress = 0; ++ else ++ tier_conf->demote_in_progress = 0; ++ pthread_mutex_unlock(&tier_conf->pause_mutex); ++ ++ return ret; ++} ++ ++/* returns _gf_true: if file can be promoted ++ * returns _gf_false: if file cannot be promoted ++ */ ++static gf_boolean_t ++tier_can_promote_file(xlator_t *this, char const *file_name, ++ struct iatt *current, gf_defrag_info_t *defrag) ++{ ++ gf_boolean_t ret = _gf_false; ++ fsblkcnt_t estimated_usage = 0; ++ ++ if (defrag->tier_conf.tier_max_promote_size && ++ (current->ia_size > defrag->tier_conf.tier_max_promote_size)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "File %s (gfid:%s) with size (%" PRIu64 ++ ") exceeds maxsize " ++ "(%d) for promotion. File will not be promoted.", ++ file_name, uuid_utoa(current->ia_gfid), current->ia_size, ++ defrag->tier_conf.tier_max_promote_size); ++ goto err; ++ } ++ ++ /* bypass further validations for TEST mode */ ++ if (defrag->tier_conf.mode != TIER_MODE_WM) { ++ ret = _gf_true; ++ goto err; ++ } ++ ++ /* convert the file size to blocks as per the block size of the ++ * destination tier ++ * NOTE: add (block_size - 1) to get the correct block size when ++ * there is a remainder after a modulo ++ */ ++ estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) / ++ defrag->tier_conf.block_size) + ++ defrag->tier_conf.blocks_used; ++ ++ /* test if the estimated block usage goes above HI watermark */ ++ if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >= ++ defrag->tier_conf.watermark_hi) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Estimated block count consumption on " ++ "hot tier (%" PRIu64 ++ ") exceeds hi watermark (%d%%). " ++ "File will not be promoted.", ++ estimated_usage, defrag->tier_conf.watermark_hi); ++ goto err; ++ } ++ ret = _gf_true; ++err: ++ return ret; ++} ++ ++static int ++tier_set_migrate_data(dict_t *migrate_data) ++{ ++ int failed = 1; ++ ++ failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force"); ++ if (failed) { ++ goto bail_out; ++ } ++ ++ /* Flag to suggest the xattr call is from migrator */ ++ failed = dict_set_str(migrate_data, "from.migrator", "yes"); ++ if (failed) { ++ goto bail_out; ++ } ++ ++ /* Flag to suggest its a tiering migration ++ * The reason for this dic key-value is that ++ * promotions and demotions are multithreaded ++ * so the original frame from gf_defrag_start() ++ * is not carried. A new frame will be created when ++ * we do syncop_setxattr(). This does not have the ++ * frame->root->pid of the original frame. So we pass ++ * this dic key-value when we do syncop_setxattr() to do ++ * data migration and set the frame->root->pid to ++ * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before ++ * calling dht_start_rebalance_task() */ ++ failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes"); ++ if (failed) { ++ goto bail_out; ++ } ++ ++ failed = 0; ++ ++bail_out: ++ return failed; ++} ++ ++static char * ++tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf, ++ int *per_link_status) ++{ ++ int ret = -1; ++ char *parent_path = NULL; ++ dict_t *xdata_request = NULL; ++ dict_t *xdata_response = NULL; ++ ++ xdata_request = dict_new(); ++ if (!xdata_request) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to create xdata_request dict"); ++ goto err; ++ } ++ ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to set value to dict : key %s \n", ++ GET_ANCESTRY_PATH_KEY); ++ goto err; ++ } ++ ++ ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request, ++ &xdata_response); ++ /* When the parent gfid is a stale entry, the lookup ++ * will fail and stop the demotion process. ++ * The parent gfid can be stale when a huge folder is ++ * deleted while the files within it are being migrated ++ */ ++ if (ret == -ESTALE) { ++ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, ++ "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = 1; ++ goto err; ++ } else if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, ++ "Error in parent lookup for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path); ++ if (ret || !parent_path) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get parent path for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = -1; ++ goto err; ++ } ++ ++err: ++ if (xdata_request) { ++ dict_unref(xdata_request); ++ } ++ ++ if (xdata_response) { ++ dict_unref(xdata_response); ++ xdata_response = NULL; ++ } ++ ++ return parent_path; ++} ++ ++static int ++tier_get_file_name_and_path(xlator_t *this, uuid_t gfid, ++ gfdb_link_info_t *link_info, ++ char const *parent_path, loc_t *loc, ++ int *per_link_status) ++{ ++ int ret = -1; ++ ++ loc->name = gf_strdup(link_info->file_name); ++ if (!loc->name) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Memory " ++ "allocation failed for %s", ++ uuid_utoa(gfid)); ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to " ++ "construct file path for %s %s\n", ++ parent_path, loc->name); ++ *per_link_status = -1; ++ goto err; ++ } ++ ++ ret = 0; ++ ++err: ++ return ret; ++} ++ ++static int ++tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current, ++ int *per_link_status) ++{ ++ int ret = -1; ++ ++ ret = syncop_lookup(this, loc, current, NULL, NULL, NULL); ++ ++ /* The file may be deleted even when the parent ++ * is available and the lookup will ++ * return a stale entry which would stop the ++ * migration. so if its a stale entry, then skip ++ * the file and keep migrating. ++ */ ++ if (ret == -ESTALE) { ++ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, ++ "Stale lookup for %s", uuid_utoa(p_loc->gfid)); ++ *per_link_status = 1; ++ goto err; ++ } else if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to " ++ "lookup file %s\n", ++ loc->name); ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = 0; ++ ++err: ++ return ret; ++} ++ ++static gf_boolean_t ++tier_is_file_already_at_destination(xlator_t *src_subvol, ++ query_cbk_args_t *query_cbk_args, ++ dht_conf_t *conf, int *per_link_status) ++{ ++ gf_boolean_t at_destination = _gf_true; ++ ++ if (src_subvol == NULL) { ++ *per_link_status = 1; ++ goto err; ++ } ++ if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) { ++ *per_link_status = 1; ++ goto err; ++ } ++ ++ if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) { ++ *per_link_status = 1; ++ goto err; ++ } ++ at_destination = _gf_false; ++ ++err: ++ return at_destination; ++} ++ ++static void ++tier_update_migration_counters(query_cbk_args_t *query_cbk_args, ++ gf_defrag_info_t *defrag, ++ uint64_t *total_migrated_bytes, int *total_files) ++{ ++ if (query_cbk_args->is_promotion) { ++ defrag->total_files_promoted++; ++ *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size; ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->tier_conf.blocks_used += defrag->tier_conf ++ .st_last_promoted_size; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } else { ++ defrag->total_files_demoted++; ++ *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size; ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } ++ if (defrag->tier_conf.blocks_total) { ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->tier_conf.percent_full = GF_PERCENTAGE( ++ defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total); ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } ++ ++ (*total_files)++; ++} ++ ++static int ++tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid, ++ gfdb_link_info_t *link_info, gf_defrag_info_t *defrag, ++ query_cbk_args_t *query_cbk_args, dict_t *migrate_data, ++ int *per_link_status, int *total_files, ++ uint64_t *total_migrated_bytes) ++{ ++ int ret = -1; ++ struct iatt current = { ++ 0, ++ }; ++ struct iatt par_stbuf = { ++ 0, ++ }; ++ loc_t p_loc = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ xlator_t *src_subvol = NULL; ++ inode_t *linked_inode = NULL; ++ char *parent_path = NULL; ++ ++ /* Lookup for parent and get the path of parent */ ++ gf_uuid_copy(p_loc.gfid, link_info->pargfid); ++ p_loc.inode = inode_new(defrag->root_inode->table); ++ if (!p_loc.inode) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to create reference to inode" ++ " for %s", ++ uuid_utoa(p_loc.gfid)); ++ ++ *per_link_status = -1; ++ goto err; ++ } ++ ++ parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf, ++ per_link_status); ++ if (!parent_path) { ++ goto err; ++ } ++ ++ linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf); ++ inode_unref(p_loc.inode); ++ p_loc.inode = linked_inode; ++ ++ /* Preparing File Inode */ ++ gf_uuid_copy(loc.gfid, gfid); ++ loc.inode = inode_new(defrag->root_inode->table); ++ gf_uuid_copy(loc.pargfid, link_info->pargfid); ++ loc.parent = inode_ref(p_loc.inode); ++ ++ /* Get filename and Construct file path */ ++ if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc, ++ per_link_status) != 0) { ++ goto err; ++ } ++ gf_uuid_copy(loc.parent->gfid, link_info->pargfid); ++ ++ /* lookup file inode */ ++ if (tier_lookup_file(this, &p_loc, &loc, ¤t, per_link_status) != 0) { ++ goto err; ++ } ++ ++ if (query_cbk_args->is_promotion) { ++ if (!tier_can_promote_file(this, link_info->file_name, ¤t, ++ defrag)) { ++ *per_link_status = 1; ++ goto err; ++ } ++ } ++ ++ linked_inode = inode_link(loc.inode, NULL, NULL, ¤t); ++ inode_unref(loc.inode); ++ loc.inode = linked_inode; ++ ++ /* ++ * Do not promote/demote if file already is where it ++ * should be. It means another brick moved the file ++ * so is not an error. So we set per_link_status = 1 ++ * so that we ignore counting this. ++ */ ++ src_subvol = dht_subvol_get_cached(this, loc.inode); ++ ++ if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf, ++ per_link_status)) { ++ goto err; ++ } ++ ++ gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s", ++ (query_cbk_args->is_promotion ? "promote" : "demote"), ++ src_subvol->name, loc.path); ++ ++ ret = tier_check_same_node(this, &loc, defrag); ++ if (ret != 0) { ++ if (ret < 0) { ++ *per_link_status = -1; ++ goto err; ++ } ++ ret = 0; ++ /* By setting per_link_status to 1 we are ++ * ignoring this status and will not be counting ++ * this file for migration */ ++ *per_link_status = 1; ++ goto err; ++ } ++ ++ gf_uuid_copy(loc.gfid, loc.inode->gfid); ++ ++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Tiering paused. " ++ "Exiting tier_migrate_link"); ++ goto err; ++ } ++ ++ ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc, ++ &defrag->tier_conf); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to " ++ "migrate %s ", ++ loc.path); ++ *per_link_status = -1; ++ goto err; ++ } ++ ++ tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes, ++ total_files); ++ ++ ret = 0; ++ ++err: ++ GF_FREE((char *)loc.name); ++ loc.name = NULL; ++ loc_wipe(&loc); ++ loc_wipe(&p_loc); ++ ++ if ((*total_files >= defrag->tier_conf.max_migrate_files) || ++ (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Reached cycle migration limit." ++ "migrated bytes %" PRId64 " files %d", ++ *total_migrated_bytes, *total_files); ++ ret = -1; ++ } ++ ++ return ret; ++} ++ ++static int ++tier_migrate_using_query_file(void *_args) ++{ ++ int ret = -1; ++ query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args; ++ xlator_t *this = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ gfdb_query_record_t *query_record = NULL; ++ gfdb_link_info_t *link_info = NULL; ++ dict_t *migrate_data = NULL; ++ /* ++ * per_file_status and per_link_status ++ * 0 : success ++ * -1 : failure ++ * 1 : ignore the status and don't count for migration ++ * */ ++ int per_file_status = 0; ++ int per_link_status = 0; ++ int total_status = 0; ++ dht_conf_t *conf = NULL; ++ uint64_t total_migrated_bytes = 0; ++ int total_files = 0; ++ loc_t root_loc = {0}; ++ gfdb_time_t start_time = {0}; ++ gfdb_time_t current_time = {0}; ++ int total_time = 0; ++ int max_time = 0; ++ gf_boolean_t emergency_demote_mode = _gf_false; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out); ++ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ ++ conf = this->private; ++ ++ defrag = query_cbk_args->defrag; ++ migrate_data = dict_new(); ++ if (!migrate_data) ++ goto out; ++ ++ emergency_demote_mode = (!query_cbk_args->is_promotion && ++ is_hot_tier_full(&defrag->tier_conf)); ++ ++ if (tier_set_migrate_data(migrate_data) != 0) { ++ goto out; ++ } ++ ++ dht_build_root_loc(defrag->root_inode, &root_loc); ++ ++ ret = gettimeofday(&start_time, NULL); ++ if (query_cbk_args->is_promotion) { ++ max_time = defrag->tier_conf.tier_promote_frequency; ++ } else { ++ max_time = defrag->tier_conf.tier_demote_frequency; ++ } ++ ++ /* Per file */ ++ while ((ret = read_query_record_list(query_cbk_args->qfile_array, ++ &query_record)) != 0) { ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to fetch query record " ++ "from query file"); ++ goto out; ++ } ++ ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ++ ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Exiting tier migration as" ++ "defrag status is not started"); ++ goto out; ++ } ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Could not get current time."); ++ goto out; ++ } ++ ++ total_time = current_time.tv_sec - start_time.tv_sec; ++ if (total_time > max_time) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Max cycle time reached. Exiting migration."); ++ goto out; ++ } ++ ++ per_file_status = 0; ++ per_link_status = 0; ++ ++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Tiering paused. " ++ "Exiting tier_migrate_using_query_file"); ++ break; ++ } ++ ++ if (defrag->tier_conf.mode == TIER_MODE_WM) { ++ ret = tier_get_fs_stat(this, &root_loc); ++ if (ret != 0) { ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "tier_get_fs_stat() FAILED ... " ++ "skipping file migrations until next cycle"); ++ break; ++ } ++ ++ if (!tier_do_migration(this, query_cbk_args->is_promotion)) { ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ ++ /* We have crossed the high watermark. Stop processing ++ * files if this is a promotion cycle so demotion gets ++ * a chance to start if not already running*/ ++ ++ if (query_cbk_args->is_promotion && ++ is_hot_tier_full(&defrag->tier_conf)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "High watermark crossed during " ++ "promotion. Exiting " ++ "tier_migrate_using_query_file"); ++ break; ++ } ++ continue; ++ } ++ } ++ ++ per_link_status = 0; ++ ++ /* For now we only support single link migration. And we will ++ * ignore other hard links in the link info list of query record ++ * TODO: Multiple hard links migration */ ++ if (!list_empty(&query_record->link_list)) { ++ link_info = list_first_entry(&query_record->link_list, ++ gfdb_link_info_t, list); ++ } ++ if (link_info != NULL) { ++ if (tier_migrate_link(this, conf, query_record->gfid, link_info, ++ defrag, query_cbk_args, migrate_data, ++ &per_link_status, &total_files, ++ &total_migrated_bytes) != 0) { ++ gf_msg( ++ this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "%s failed for %s(gfid:%s)", ++ (query_cbk_args->is_promotion ? "Promotion" : "Demotion"), ++ link_info->file_name, uuid_utoa(query_record->gfid)); ++ } ++ } ++ per_file_status = per_link_status; ++ ++ if (per_file_status < 0) { /* Failure */ ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->total_failures++; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } else if (per_file_status == 0) { /* Success */ ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->total_files++; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } else if (per_file_status == 1) { /* Ignore */ ++ per_file_status = 0; ++ /* Since this attempt was ignored we ++ * decrement the lookup count*/ ++ pthread_mutex_lock(&dm_stat_mutex); ++ defrag->num_files_lookedup--; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ } ++ total_status = total_status + per_file_status; ++ per_link_status = 0; ++ per_file_status = 0; ++ ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ ++ /* If we are demoting and the entry watermark was HI, then ++ * we are done with emergency demotions if the current ++ * watermark has fallen below hi-watermark level ++ */ ++ if (emergency_demote_mode) { ++ if (tier_check_watermark(this) == 0) { ++ if (!is_hot_tier_full(&defrag->tier_conf)) { ++ break; ++ } ++ } ++ } ++ } ++ ++out: ++ if (migrate_data) ++ dict_unref(migrate_data); ++ ++ gfdb_methods.gfdb_query_record_free(query_record); ++ query_record = NULL; ++ ++ return total_status; ++} ++ ++/* This is the call back function per record/file from data base */ ++static int ++tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args) ++{ ++ int ret = -1; ++ query_cbk_args_t *query_cbk_args = _args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out); ++ GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out); ++ ++ ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd, ++ gfdb_query_record); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed writing query record to query file"); ++ goto out; ++ } ++ ++ pthread_mutex_lock(&dm_stat_mutex); ++ query_cbk_args->defrag->num_files_lookedup++; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++/* Create query file in tier process */ ++static int ++tier_process_self_query(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ char *db_path = NULL; ++ query_cbk_args_t *query_cbk_args = NULL; ++ xlator_t *this = NULL; ++ gfdb_conn_node_t *conn_node = NULL; ++ dict_t *params_dict = NULL; ++ dict_t *ctr_ipc_dict = NULL; ++ gfdb_brick_info_t *gfdb_brick_info = args; ++ ++ /*Init of all the essentials*/ ++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); ++ query_cbk_args = gfdb_brick_info->_query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ ++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); ++ ++ db_path = local_brick->brick_db_path; ++ ++ /*Preparing DB parameters before init_db i.e getting db connection*/ ++ params_dict = dict_new(); ++ if (!params_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "DB Params cannot initialized"); ++ goto out; ++ } ++ SET_DB_PARAM_TO_DICT(this->name, params_dict, ++ (char *)gfdb_methods.get_db_path_key(), db_path, ret, ++ out); ++ ++ /*Get the db connection*/ ++ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); ++ if (!conn_node) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "FATAL: Failed initializing db operations"); ++ goto out; ++ } ++ ++ /* Query for eligible files from db */ ++ query_cbk_args->query_fd = open(local_brick->qfile_path, ++ O_WRONLY | O_CREAT | O_APPEND, ++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); ++ if (query_cbk_args->query_fd < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to open query file %s", local_brick->qfile_path); ++ goto out; ++ } ++ if (!gfdb_brick_info->_gfdb_promote) { ++ if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) { ++ /* emergency demotion mode */ ++ ret = gfdb_methods.find_all( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ query_cbk_args->defrag->tier_conf.query_limit); ++ } else { ++ if (query_cbk_args->defrag->write_freq_threshold == 0 && ++ query_cbk_args->defrag->read_freq_threshold == 0) { ++ ret = gfdb_methods.find_unchanged_for_time( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp); ++ } else { ++ ret = gfdb_methods.find_unchanged_for_time_freq( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp, ++ query_cbk_args->defrag->write_freq_threshold, ++ query_cbk_args->defrag->read_freq_threshold, _gf_false); ++ } ++ } ++ } else { ++ if (query_cbk_args->defrag->write_freq_threshold == 0 && ++ query_cbk_args->defrag->read_freq_threshold == 0) { ++ ret = gfdb_methods.find_recently_changed_files( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp); ++ } else { ++ ret = gfdb_methods.find_recently_changed_files_freq( ++ conn_node, tier_gf_query_callback, (void *)query_cbk_args, ++ gfdb_brick_info->time_stamp, ++ query_cbk_args->defrag->write_freq_threshold, ++ query_cbk_args->defrag->read_freq_threshold, _gf_false); ++ } ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "FATAL: query from db failed"); ++ goto out; ++ } ++ ++ /*Clear the heat on the DB entries*/ ++ /*Preparing ctr_ipc_dict*/ ++ ctr_ipc_dict = dict_new(); ++ if (!ctr_ipc_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_dict cannot initialized"); ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_CLEAR_OPS, ret, out); ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, ++ NULL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed clearing the heat " ++ "on db %s error %d", ++ local_brick->brick_db_path, ret); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (params_dict) { ++ dict_unref(params_dict); ++ params_dict = NULL; ++ } ++ ++ if (ctr_ipc_dict) { ++ dict_unref(ctr_ipc_dict); ++ ctr_ipc_dict = NULL; ++ } ++ ++ if (query_cbk_args && query_cbk_args->query_fd >= 0) { ++ sys_close(query_cbk_args->query_fd); ++ query_cbk_args->query_fd = -1; ++ } ++ gfdb_methods.fini_db(conn_node); ++ ++ return ret; ++} ++ ++/*Ask CTR to create the query file*/ ++static int ++tier_process_ctr_query(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ query_cbk_args_t *query_cbk_args = NULL; ++ xlator_t *this = NULL; ++ dict_t *ctr_ipc_in_dict = NULL; ++ dict_t *ctr_ipc_out_dict = NULL; ++ gfdb_brick_info_t *gfdb_brick_info = args; ++ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; ++ int count = 0; ++ ++ /*Init of all the essentials*/ ++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); ++ query_cbk_args = gfdb_brick_info->_query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ ++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); ++ ++ /*Preparing ctr_ipc_in_dict*/ ++ ctr_ipc_in_dict = dict_new(); ++ if (!ctr_ipc_in_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_in_dict cannot initialized"); ++ goto out; ++ } ++ ++ ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t), ++ gf_tier_mt_ipc_ctr_params_t); ++ if (!ipc_ctr_params) { ++ goto out; ++ } ++ ++ /* set all the query params*/ ++ ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; ++ ++ ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag ++ ->write_freq_threshold; ++ ++ ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag ++ ->read_freq_threshold; ++ ++ ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit; ++ ++ ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote && ++ query_cbk_args->defrag->tier_conf ++ .watermark_last == TIER_WM_HI); ++ ++ memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp, ++ sizeof(gfdb_time_t)); ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_QUERY_OPS, ret, out); ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, ++ GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path, ++ ret, out); ++ ++ ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, ++ ipc_ctr_params, sizeof(*ipc_ctr_params)); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed setting %s to params dictionary", ++ GFDB_IPC_CTR_GET_QUERY_PARAMS); ++ GF_FREE(ipc_ctr_params); ++ goto out; ++ } ++ ipc_ctr_params = NULL; ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict, ++ &ctr_ipc_out_dict); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR, ++ "Failed query on %s ret %d", local_brick->brick_db_path, ret); ++ goto out; ++ } ++ ++ ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ++ &count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed getting count " ++ "of records on %s", ++ local_brick->brick_db_path); ++ goto out; ++ } ++ ++ if (count < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed query on %s", local_brick->brick_db_path); ++ ret = -1; ++ goto out; ++ } ++ ++ pthread_mutex_lock(&dm_stat_mutex); ++ query_cbk_args->defrag->num_files_lookedup = count; ++ pthread_mutex_unlock(&dm_stat_mutex); ++ ++ ret = 0; ++out: ++ ++ if (ctr_ipc_in_dict) { ++ dict_unref(ctr_ipc_in_dict); ++ ctr_ipc_in_dict = NULL; ++ } ++ ++ if (ctr_ipc_out_dict) { ++ dict_unref(ctr_ipc_out_dict); ++ ctr_ipc_out_dict = NULL; ++ } ++ ++ GF_FREE(ipc_ctr_params); ++ ++ return ret; ++} ++ ++/* This is the call back function for each brick from hot/cold bricklist ++ * It picks up each bricks db and queries for eligible files for migration. ++ * The list of eligible files are populated in appropriate query files*/ ++static int ++tier_process_brick(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ dict_t *ctr_ipc_in_dict = NULL; ++ dict_t *ctr_ipc_out_dict = NULL; ++ char *strval = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); ++ ++ if (dht_tier_db_type == GFDB_SQLITE3) { ++ /*Preparing ctr_ipc_in_dict*/ ++ ctr_ipc_in_dict = dict_new(); ++ if (!ctr_ipc_in_dict) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_in_dict cannot initialized"); ++ goto out; ++ } ++ ++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_GET_DB_PARAM_OPS); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ GFDB_IPC_CTR_KEY); ++ goto out; ++ } ++ ++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, ""); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ GFDB_IPC_CTR_GET_DB_PARAM_OPS); ++ goto out; ++ } ++ ++ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY, ++ "journal_mode"); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ GFDB_IPC_CTR_GET_DB_KEY); ++ goto out; ++ } ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ++ ctr_ipc_in_dict, &ctr_ipc_out_dict); ++ if (ret || ctr_ipc_out_dict == NULL) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to get " ++ "journal_mode of sql db %s", ++ local_brick->brick_db_path); ++ goto out; ++ } ++ ++ ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval); ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED, ++ "Failed to get %s " ++ "from params dictionary" ++ "journal_mode", ++ strval); ++ goto out; ++ } ++ ++ if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) { ++ ret = tier_process_self_query(local_brick, args); ++ if (ret) { ++ goto out; ++ } ++ } else { ++ ret = tier_process_ctr_query(local_brick, args); ++ if (ret) { ++ goto out; ++ } ++ } ++ ret = 0; ++ ++ } else { ++ ret = tier_process_self_query(local_brick, args); ++ if (ret) { ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ if (ctr_ipc_in_dict) ++ dict_unref(ctr_ipc_in_dict); ++ ++ if (ctr_ipc_out_dict) ++ dict_unref(ctr_ipc_out_dict); ++ ++ return ret; ++} ++ ++static int ++tier_build_migration_qfile(migration_args_t *args, ++ query_cbk_args_t *query_cbk_args, ++ gf_boolean_t is_promotion) ++{ ++ gfdb_time_t current_time; ++ gfdb_brick_info_t gfdb_brick_info; ++ gfdb_time_t time_in_past; ++ int ret = -1; ++ tier_brick_list_t *local_brick = NULL; ++ int i = 0; ++ time_in_past.tv_sec = args->freq_time; ++ time_in_past.tv_usec = 0; ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_msg(args->this->name, GF_LOG_ERROR, errno, ++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); ++ goto out; ++ } ++ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; ++ ++ /* The migration daemon may run a varying numberof usec after the */ ++ /* sleep call triggers. A file may be registered in CTR some number */ ++ /* of usec X after the daemon started and missed in the subsequent */ ++ /* cycle if the daemon starts Y usec after the period in seconds */ ++ /* where Y>X. Normalize away this problem by always setting usec */ ++ /* to 0. */ ++ time_in_past.tv_usec = 0; ++ ++ gfdb_brick_info.time_stamp = &time_in_past; ++ gfdb_brick_info._gfdb_promote = is_promotion; ++ gfdb_brick_info._query_cbk_args = query_cbk_args; ++ ++ list_for_each_entry(local_brick, args->brick_list, list) ++ { ++ /* Construct query file path for this brick ++ * i.e ++ * /var/run/gluster/xlator_name/ ++ * {promote/demote}-brickname-indexinbricklist ++ * So that no two query files will have same path even ++ * bricks have the same name ++ * */ ++ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", ++ GET_QFILE_PATH(gfdb_brick_info._gfdb_promote), ++ local_brick->brick_name, i); ++ ++ /* Delete any old query files for this brick */ ++ sys_unlink(local_brick->qfile_path); ++ ++ ret = tier_process_brick(local_brick, &gfdb_brick_info); ++ if (ret) { ++ gf_msg(args->this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n", ++ local_brick->brick_db_path); ++ } ++ i++; ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int ++tier_migrate_files_using_qfile(migration_args_t *comp, ++ query_cbk_args_t *query_cbk_args) ++{ ++ int ret = -1; ++ tier_brick_list_t *local_brick = NULL; ++ tier_brick_list_t *temp = NULL; ++ gfdb_time_t current_time = { ++ 0, ++ }; ++ ssize_t qfile_array_size = 0; ++ int count = 0; ++ int temp_fd = 0; ++ gf_tier_conf_t *tier_conf = NULL; ++ ++ tier_conf = &(query_cbk_args->defrag->tier_conf); ++ ++ /* Time for error query files */ ++ gettimeofday(¤t_time, NULL); ++ ++ /* Build the qfile list */ ++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) ++ { ++ qfile_array_size++; ++ } ++ query_cbk_args->qfile_array = qfile_array_new(qfile_array_size); ++ if (!query_cbk_args->qfile_array) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to create new " ++ "qfile_array"); ++ goto out; ++ } ++ ++ /*Open all qfiles*/ ++ count = 0; ++ query_cbk_args->qfile_array->exhausted_count = 0; ++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) ++ { ++ temp_fd = query_cbk_args->qfile_array->fd_array[count]; ++ temp_fd = open(local_brick->qfile_path, O_RDONLY, ++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); ++ if (temp_fd < 0) { ++ gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to open " ++ "%s to the query file", ++ local_brick->qfile_path); ++ query_cbk_args->qfile_array->exhausted_count++; ++ } ++ query_cbk_args->qfile_array->fd_array[count] = temp_fd; ++ count++; ++ } ++ ++ /* Moving the query file index to the next, so that we won't the same ++ * query file every cycle as the first one */ ++ query_cbk_args->qfile_array ++ ->next_index = (query_cbk_args->is_promotion) ++ ? tier_conf->last_promote_qfile_index ++ : tier_conf->last_demote_qfile_index; ++ shift_next_index(query_cbk_args->qfile_array); ++ if (query_cbk_args->is_promotion) { ++ tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array ++ ->next_index; ++ } else { ++ tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array ++ ->next_index; ++ } ++ ++ /* Migrate files using query file list */ ++ ret = tier_migrate_using_query_file((void *)query_cbk_args); ++out: ++ qfile_array_free(query_cbk_args->qfile_array); ++ ++ /* If there is an error rename all the query files to .err files ++ * with a timestamp for better debugging */ ++ if (ret) { ++ struct tm tm = { ++ 0, ++ }; ++ char time_str[128] = { ++ 0, ++ }; ++ char query_file_path_err[PATH_MAX] = { ++ 0, ++ }; ++ int32_t len = 0; ++ ++ /* Time format for error query files */ ++ gmtime_r(¤t_time.tv_sec, &tm); ++ strftime(time_str, sizeof(time_str), "%F-%T", &tm); ++ ++ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) ++ { ++ /* rename error qfile*/ ++ len = snprintf(query_file_path_err, sizeof(query_file_path_err), ++ "%s-%s.err", local_brick->qfile_path, time_str); ++ if ((len >= 0) && (len < sizeof(query_file_path_err))) { ++ if (sys_rename(local_brick->qfile_path, query_file_path_err) == ++ -1) ++ gf_msg_debug("tier", 0, ++ "rename " ++ "failed"); ++ } ++ } ++ } ++ ++ query_cbk_args->qfile_array = NULL; ++ ++ return ret; ++} ++ ++int ++tier_demote(migration_args_t *demotion_args) ++{ ++ query_cbk_args_t query_cbk_args; ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO("tier", demotion_args, out); ++ GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out); ++ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list, ++ out); ++ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out); ++ ++ THIS = demotion_args->this; ++ ++ query_cbk_args.this = demotion_args->this; ++ query_cbk_args.defrag = demotion_args->defrag; ++ query_cbk_args.is_promotion = 0; ++ ++ /*Build the query file using bricklist*/ ++ ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false); ++ if (ret) ++ goto out; ++ ++ /* Migrate files using the query file */ ++ ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args); ++ if (ret) ++ goto out; ++ ++out: ++ demotion_args->return_value = ret; ++ return ret; ++} ++ ++int ++tier_promote(migration_args_t *promotion_args) ++{ ++ int ret = -1; ++ query_cbk_args_t query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out); ++ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list, ++ out); ++ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag, ++ out); ++ ++ THIS = promotion_args->this; ++ ++ query_cbk_args.this = promotion_args->this; ++ query_cbk_args.defrag = promotion_args->defrag; ++ query_cbk_args.is_promotion = 1; ++ ++ /*Build the query file using bricklist*/ ++ ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true); ++ if (ret) ++ goto out; ++ ++ /* Migrate files using the query file */ ++ ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args); ++ if (ret) ++ goto out; ++ ++out: ++ promotion_args->return_value = ret; ++ return ret; ++} ++ ++/* ++ * Command the CTR on a brick to compact the local database using an IPC ++ */ ++static int ++tier_process_self_compact(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ char *db_path = NULL; ++ query_cbk_args_t *query_cbk_args = NULL; ++ xlator_t *this = NULL; ++ gfdb_conn_node_t *conn_node = NULL; ++ dict_t *params_dict = NULL; ++ dict_t *ctr_ipc_dict = NULL; ++ gfdb_brick_info_t *gfdb_brick_info = args; ++ ++ /*Init of all the essentials*/ ++ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); ++ query_cbk_args = gfdb_brick_info->_query_cbk_args; ++ ++ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); ++ this = query_cbk_args->this; ++ ++ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); ++ ++ db_path = local_brick->brick_db_path; ++ ++ /*Preparing DB parameters before init_db i.e getting db connection*/ ++ params_dict = dict_new(); ++ if (!params_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "DB Params cannot initialized"); ++ goto out; ++ } ++ SET_DB_PARAM_TO_DICT(this->name, params_dict, ++ (char *)gfdb_methods.get_db_path_key(), db_path, ret, ++ out); ++ ++ /*Get the db connection*/ ++ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); ++ if (!conn_node) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "FATAL: Failed initializing db operations"); ++ goto out; ++ } ++ ++ ret = 0; ++ ++ /*Preparing ctr_ipc_dict*/ ++ ctr_ipc_dict = dict_new(); ++ if (!ctr_ipc_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "ctr_ipc_dict cannot initialized"); ++ goto out; ++ } ++ ++ ret = dict_set_int32(ctr_ipc_dict, "compact_active", ++ query_cbk_args->defrag->tier_conf.compact_active); ++ ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ "compact_active"); ++ goto out; ++ } ++ ++ ret = dict_set_int32( ++ ctr_ipc_dict, "compact_mode_switched", ++ query_cbk_args->defrag->tier_conf.compact_mode_switched); ++ ++ if (ret) { ++ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, ++ "Failed to set %s " ++ "to params dictionary", ++ "compact_mode_switched"); ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, ++ GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out); ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Starting Compaction IPC"); ++ ++ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, ++ NULL); ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Ending Compaction IPC"); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed compaction " ++ "on db %s error %d", ++ local_brick->brick_db_path, ret); ++ goto out; ++ } ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "SUCCESS: %s Compaction", local_brick->brick_name); ++ ++ ret = 0; ++out: ++ if (params_dict) { ++ dict_unref(params_dict); ++ params_dict = NULL; ++ } ++ ++ if (ctr_ipc_dict) { ++ dict_unref(ctr_ipc_dict); ++ ctr_ipc_dict = NULL; ++ } ++ ++ gfdb_methods.fini_db(conn_node); ++ ++ return ret; ++} ++ ++/* ++ * This is the call back function for each brick from hot/cold bricklist. ++ * It determines the database type on each brick and calls the corresponding ++ * function to prepare the compaction IPC. ++ */ ++static int ++tier_compact_db_brick(tier_brick_list_t *local_brick, void *args) ++{ ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick, out); ++ ++ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); ++ ++ ret = tier_process_self_compact(local_brick, args); ++ if (ret) { ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Brick %s did not compact", local_brick->brick_name); ++ goto out; ++ } ++ ++ ret = 0; ++ ++out: ++ ++ return ret; ++} ++ ++static int ++tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args) ++{ ++ gfdb_time_t current_time; ++ gfdb_brick_info_t gfdb_brick_info; ++ gfdb_time_t time_in_past; ++ int ret = -1; ++ tier_brick_list_t *local_brick = NULL; ++ ++ time_in_past.tv_sec = args->freq_time; ++ time_in_past.tv_usec = 0; ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_msg(args->this->name, GF_LOG_ERROR, errno, ++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); ++ goto out; ++ } ++ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; ++ ++ /* The migration daemon may run a varying numberof usec after the sleep ++ call triggers. A file may be registered in CTR some number of usec X ++ after the daemon started and missed in the subsequent cycle if the ++ daemon starts Y usec after the period in seconds where Y>X. Normalize ++ away this problem by always setting usec to 0. */ ++ time_in_past.tv_usec = 0; ++ ++ gfdb_brick_info.time_stamp = &time_in_past; ++ ++ /* This is meant to say we are always compacting at this point */ ++ /* We simply borrow the promotion flag to do this */ ++ gfdb_brick_info._gfdb_promote = 1; ++ ++ gfdb_brick_info._query_cbk_args = query_cbk_args; ++ ++ list_for_each_entry(local_brick, args->brick_list, list) ++ { ++ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Start compaction for %s", local_brick->brick_name); ++ ++ ret = tier_compact_db_brick(local_brick, &gfdb_brick_info); ++ if (ret) { ++ gf_msg(args->this->name, GF_LOG_ERROR, 0, ++ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n", ++ local_brick->brick_db_path); ++ } ++ ++ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "End compaction for %s", local_brick->brick_name); ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int ++tier_compact(void *args) ++{ ++ int ret = -1; ++ query_cbk_args_t query_cbk_args; ++ migration_args_t *compaction_args = args; ++ ++ GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out); ++ GF_VALIDATE_OR_GOTO(compaction_args->this->name, ++ compaction_args->brick_list, out); ++ GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag, ++ out); ++ ++ THIS = compaction_args->this; ++ ++ query_cbk_args.this = compaction_args->this; ++ query_cbk_args.defrag = compaction_args->defrag; ++ query_cbk_args.is_compaction = 1; ++ ++ /* Send the compaction pragma out to all the bricks on the bricklist. */ ++ /* tier_get_bricklist ensures all bricks on the list are local to */ ++ /* this node. */ ++ ret = tier_send_compact(compaction_args, &query_cbk_args); ++ if (ret) ++ goto out; ++ ++ ret = 0; ++out: ++ compaction_args->return_value = ret; ++ return ret; ++} ++ ++static int ++tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head) ++{ ++ xlator_list_t *child = NULL; ++ char *rv = NULL; ++ char *rh = NULL; ++ char *brickname = NULL; ++ char db_name[PATH_MAX] = ""; ++ int ret = 0; ++ tier_brick_list_t *local_brick = NULL; ++ int32_t len = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", xl, out); ++ GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out); ++ ++ /* ++ * This function obtains remote subvolumes and filters out only ++ * those running on the same node as the tier daemon. ++ */ ++ if (strcmp(xl->type, "protocol/client") == 0) { ++ ret = dict_get_str(xl->options, "remote-host", &rh); ++ if (ret < 0) ++ goto out; ++ ++ if (gf_is_local_addr(rh)) { ++ local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t), ++ gf_tier_mt_bricklist_t); ++ if (!local_brick) { ++ goto out; ++ } ++ ++ ret = dict_get_str(xl->options, "remote-subvolume", &rv); ++ if (ret < 0) ++ goto out; ++ ++ brickname = strrchr(rv, '/') + 1; ++ snprintf(db_name, sizeof(db_name), "%s.db", brickname); ++ ++ local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char); ++ if (!local_brick->brick_db_path) { ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Failed to allocate memory for" ++ " bricklist."); ++ ret = -1; ++ goto out; ++ } ++ ++ len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv, ++ GF_HIDDEN_PATH, db_name); ++ if ((len < 0) || (len >= PATH_MAX)) { ++ gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS, ++ "DB path too long"); ++ ret = -1; ++ goto out; ++ } ++ ++ local_brick->xlator = xl; ++ ++ snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname); ++ ++ list_add_tail(&(local_brick->list), local_bricklist_head); ++ ++ ret = 0; ++ goto out; ++ } ++ } ++ ++ for (child = xl->children; child; child = child->next) { ++ ret = tier_get_bricklist(child->xlator, local_bricklist_head); ++ if (ret) { ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ ++ if (ret) { ++ if (local_brick) { ++ GF_FREE(local_brick->brick_db_path); ++ } ++ GF_FREE(local_brick); ++ } ++ ++ return ret; ++} ++ ++int ++tier_get_freq_demote(gf_tier_conf_t *tier_conf) ++{ ++ if ((tier_conf->mode == TIER_MODE_WM) && ++ (tier_conf->watermark_last == TIER_WM_HI)) ++ return DEFAULT_DEMOTE_DEGRADED; ++ else ++ return tier_conf->tier_demote_frequency; ++} ++ ++int ++tier_get_freq_promote(gf_tier_conf_t *tier_conf) ++{ ++ return tier_conf->tier_promote_frequency; ++} ++ ++int ++tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf) ++{ ++ return tier_conf->tier_compact_hot_frequency; ++} ++ ++int ++tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf) ++{ ++ return tier_conf->tier_compact_cold_frequency; ++} ++ ++static int ++tier_check_demote(gfdb_time_t current_time, int freq) ++{ ++ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; ++} ++ ++static gf_boolean_t ++tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, ++ int freq) ++{ ++ if ((tier_conf->mode == TIER_MODE_WM) && ++ (tier_conf->watermark_last == TIER_WM_HI)) ++ return _gf_false; ++ ++ else ++ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; ++} ++ ++static gf_boolean_t ++tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, ++ int freq_compact) ++{ ++ if (!(tier_conf->compact_active || tier_conf->compact_mode_switched)) ++ return _gf_false; ++ ++ return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false; ++} ++ ++void ++clear_bricklist(struct list_head *brick_list) ++{ ++ tier_brick_list_t *local_brick = NULL; ++ tier_brick_list_t *temp = NULL; ++ ++ if (list_empty(brick_list)) { ++ return; ++ } ++ ++ list_for_each_entry_safe(local_brick, temp, brick_list, list) ++ { ++ list_del(&local_brick->list); ++ GF_FREE(local_brick->brick_db_path); ++ GF_FREE(local_brick); ++ } ++} ++ ++static void ++set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold) ++{ ++ tier_brick_list_t *local_brick = NULL; ++ int i = 0; ++ ++ GF_VALIDATE_OR_GOTO("tier", brick_list, out); ++ ++ list_for_each_entry(local_brick, brick_list, list) ++ { ++ /* Construct query file path for this brick ++ * i.e ++ * /var/run/gluster/xlator_name/ ++ * {promote/demote}-brickname-indexinbricklist ++ * So that no two query files will have same path even ++ * bricks have the same name ++ * */ ++ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", ++ GET_QFILE_PATH(is_cold), local_brick->brick_name, i); ++ i++; ++ } ++out: ++ return; ++} ++ ++static int ++tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time) ++{ ++ xlator_t *this = NULL; ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ gf_tier_conf_t *tier_conf = NULL; ++ gf_boolean_t is_hot_tier = args->is_hot_tier; ++ int freq = 0; ++ int ret = -1; ++ const char *tier_type = is_hot_tier ? "hot" : "cold"; ++ ++ this = args->this; ++ ++ conf = this->private; ++ ++ defrag = conf->defrag; ++ ++ tier_conf = &defrag->tier_conf; ++ ++ freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf) ++ : tier_get_freq_compact_cold(tier_conf); ++ ++ defrag->tier_conf.compact_mode_switched = ++ is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot ++ : defrag->tier_conf.compact_mode_switched_cold; ++ ++ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Compact mode %i", defrag->tier_conf.compact_mode_switched); ++ ++ if (tier_check_compact(tier_conf, current_time, freq)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Start compaction on %s tier", tier_type); ++ ++ args->freq_time = freq; ++ ret = tier_compact(args); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Compaction failed on " ++ "%s tier", ++ tier_type); ++ goto out; ++ } ++ ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "End compaction on %s tier", tier_type); ++ ++ if (is_hot_tier) { ++ defrag->tier_conf.compact_mode_switched_hot = _gf_false; ++ } else { ++ defrag->tier_conf.compact_mode_switched_cold = _gf_false; ++ } ++ } ++ ++out: ++ return ret; ++} ++ ++static int ++tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) ++{ ++ if (mode == TIER_MODE_WM && wm == TIER_WM_HI) ++ return WM_INTERVAL_EMERG; ++ ++ return WM_INTERVAL; ++} ++ ++/* ++ * Main tiering loop. This is called from the promotion and the ++ * demotion threads spawned in tier_start(). ++ * ++ * Every second, wake from sleep to perform tasks. ++ * 1. Check trigger to migrate data. ++ * 2. Check for state changes (pause, unpause, stop). ++ */ ++static void * ++tier_run(void *in_args) ++{ ++ dht_conf_t *conf = NULL; ++ gfdb_time_t current_time = {0}; ++ int freq = 0; ++ int ret = 0; ++ xlator_t *any = NULL; ++ xlator_t *xlator = NULL; ++ gf_tier_conf_t *tier_conf = NULL; ++ loc_t root_loc = {0}; ++ int check_watermark = 0; ++ gf_defrag_info_t *defrag = NULL; ++ xlator_t *this = NULL; ++ migration_args_t *args = in_args; ++ GF_VALIDATE_OR_GOTO("tier", args, out); ++ GF_VALIDATE_OR_GOTO("tier", args->brick_list, out); ++ ++ this = args->this; ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO("tier", conf, out); ++ ++ defrag = conf->defrag; ++ GF_VALIDATE_OR_GOTO("tier", defrag, out); ++ ++ if (list_empty(args->brick_list)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Brick list for tier is empty. Exiting."); ++ goto out; ++ } ++ ++ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; ++ tier_conf = &defrag->tier_conf; ++ ++ dht_build_root_loc(defrag->root_inode, &root_loc); ++ ++ while (1) { ++ /* ++ * Check if a graph switch occurred. If so, stop migration ++ * thread. It will need to be restarted manually. ++ */ ++ any = THIS->ctx->active->first; ++ xlator = xlator_search_by_name(any, this->name); ++ ++ if (xlator != this) { ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Detected graph switch. Exiting migration " ++ "daemon."); ++ goto out; ++ } ++ ++ gf_defrag_check_pause_tier(tier_conf); ++ ++ sleep(1); ++ ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ++ ret = 1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "defrag->defrag_status != " ++ "GF_DEFRAG_STATUS_STARTED"); ++ goto out; ++ } ++ ++ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || ++ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { ++ ret = 0; ++ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; ++ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR, ++ "defrag->defrag_cmd == " ++ "GF_DEFRAG_CMD_START_DETACH_TIER"); ++ goto out; ++ } ++ ++ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) ++ continue; ++ ++ /* To have proper synchronization amongst all ++ * brick holding nodes, so that promotion and demotions ++ * start atomically w.r.t promotion/demotion frequency ++ * period, all nodes should have their system time ++ * in-sync with each other either manually set or ++ * using a NTP server*/ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ DHT_MSG_SYS_CALL_GET_TIME_FAILED, ++ "Failed to get current time"); ++ goto out; ++ } ++ ++ check_watermark++; ++ ++ /* emergency demotion requires frequent watermark monitoring */ ++ if (check_watermark >= ++ tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) { ++ check_watermark = 0; ++ if (tier_conf->mode == TIER_MODE_WM) { ++ ret = tier_get_fs_stat(this, &root_loc); ++ if (ret != 0) { ++ continue; ++ } ++ ret = tier_check_watermark(this); ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_CRITICAL, errno, ++ DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark"); ++ continue; ++ } ++ } ++ } ++ ++ if (args->is_promotion) { ++ freq = tier_get_freq_promote(tier_conf); ++ ++ if (tier_check_promote(tier_conf, current_time, freq)) { ++ args->freq_time = freq; ++ ret = tier_promote(args); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Promotion failed"); ++ } ++ } ++ } else if (args->is_compaction) { ++ tier_prepare_compact(args, current_time); ++ } else { ++ freq = tier_get_freq_demote(tier_conf); ++ ++ if (tier_check_demote(current_time, freq)) { ++ args->freq_time = freq; ++ ret = tier_demote(args); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Demotion failed"); ++ } ++ } ++ } ++ ++ /* Check the statfs immediately after the processing threads ++ return */ ++ check_watermark = WM_INTERVAL; ++ } ++ ++ ret = 0; ++out: ++ ++ args->return_value = ret; ++ ++ return NULL; ++} ++ ++int ++tier_start(xlator_t *this, gf_defrag_info_t *defrag) ++{ ++ pthread_t promote_thread; ++ pthread_t demote_thread; ++ pthread_t hot_compact_thread; ++ pthread_t cold_compact_thread; ++ int ret = -1; ++ struct list_head bricklist_hot = {0}; ++ struct list_head bricklist_cold = {0}; ++ migration_args_t promotion_args = {0}; ++ migration_args_t demotion_args = {0}; ++ migration_args_t hot_compaction_args = {0}; ++ migration_args_t cold_compaction_args = {0}; ++ dht_conf_t *conf = NULL; ++ ++ INIT_LIST_HEAD((&bricklist_hot)); ++ INIT_LIST_HEAD((&bricklist_cold)); ++ ++ conf = this->private; ++ ++ tier_get_bricklist(conf->subvolumes[1], &bricklist_hot); ++ set_brick_list_qpath(&bricklist_hot, _gf_false); ++ ++ demotion_args.this = this; ++ demotion_args.brick_list = &bricklist_hot; ++ demotion_args.defrag = defrag; ++ demotion_args.is_promotion = _gf_false; ++ demotion_args.is_compaction = _gf_false; ++ ++ ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args, ++ "tierdem"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start demotion thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto cleanup; ++ } ++ ++ tier_get_bricklist(conf->subvolumes[0], &bricklist_cold); ++ set_brick_list_qpath(&bricklist_cold, _gf_true); ++ ++ promotion_args.this = this; ++ promotion_args.brick_list = &bricklist_cold; ++ promotion_args.defrag = defrag; ++ promotion_args.is_promotion = _gf_true; ++ ++ ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args, ++ "tierpro"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start promotion thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto waitforspawned; ++ } ++ ++ hot_compaction_args.this = this; ++ hot_compaction_args.brick_list = &bricklist_hot; ++ hot_compaction_args.defrag = defrag; ++ hot_compaction_args.is_promotion = _gf_false; ++ hot_compaction_args.is_compaction = _gf_true; ++ hot_compaction_args.is_hot_tier = _gf_true; ++ ++ ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run, ++ &hot_compaction_args, "tierhcom"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start compaction thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto waitforspawnedpromote; ++ } ++ ++ cold_compaction_args.this = this; ++ cold_compaction_args.brick_list = &bricklist_cold; ++ cold_compaction_args.defrag = defrag; ++ cold_compaction_args.is_promotion = _gf_false; ++ cold_compaction_args.is_compaction = _gf_true; ++ cold_compaction_args.is_hot_tier = _gf_false; ++ ++ ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run, ++ &cold_compaction_args, "tierccom"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Failed to start compaction thread."); ++ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; ++ goto waitforspawnedhotcompact; ++ } ++ pthread_join(cold_compact_thread, NULL); ++ ++waitforspawnedhotcompact: ++ pthread_join(hot_compact_thread, NULL); ++ ++waitforspawnedpromote: ++ pthread_join(promote_thread, NULL); ++ ++waitforspawned: ++ pthread_join(demote_thread, NULL); ++ ++cleanup: ++ clear_bricklist(&bricklist_cold); ++ clear_bricklist(&bricklist_hot); ++ return ret; ++} ++ ++int32_t ++tier_migration_needed(xlator_t *this) ++{ ++ gf_defrag_info_t *defrag = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = 0; ++ ++ conf = this->private; ++ ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out); ++ ++ defrag = conf->defrag; ++ ++ if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || ++ (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) ++ ret = 1; ++out: ++ return ret; ++} ++ ++int32_t ++tier_migration_get_dst(xlator_t *this, dht_local_t *local) ++{ ++ dht_conf_t *conf = NULL; ++ int32_t ret = -1; ++ gf_defrag_info_t *defrag = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ ++ conf = this->private; ++ ++ defrag = conf->defrag; ++ ++ if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) { ++ local->rebalance.target_node = conf->subvolumes[0]; ++ ++ } else if (conf->subvolumes[0] == local->cached_subvol) ++ local->rebalance.target_node = conf->subvolumes[1]; ++ else ++ local->rebalance.target_node = conf->subvolumes[0]; ++ ++ if (local->rebalance.target_node) ++ ret = 0; ++ ++out: ++ return ret; ++} ++ ++xlator_t * ++tier_search(xlator_t *this, dht_layout_t *layout, const char *name) ++{ ++ xlator_t *subvol = NULL; ++ dht_conf_t *conf = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ ++ conf = this->private; ++ ++ subvol = TIER_HASHED_SUBVOL; ++ ++out: ++ return subvol; ++} ++ ++static int ++tier_load_externals(xlator_t *this) ++{ ++ int ret = -1; ++ char *libpathfull = (LIBDIR "/libgfdb.so.0"); ++ get_gfdb_methods_t get_gfdb_methods; ++ ++ GF_VALIDATE_OR_GOTO("this", this, out); ++ ++ libhandle = dlopen(libpathfull, RTLD_NOW); ++ if (!libhandle) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Error loading libgfdb.so %s\n", dlerror()); ++ ret = -1; ++ goto out; ++ } ++ ++ get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods"); ++ if (!get_gfdb_methods) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Error loading get_gfdb_methods()"); ++ ret = -1; ++ goto out; ++ } ++ ++ get_gfdb_methods(&gfdb_methods); ++ ++ ret = 0; ++ ++out: ++ if (ret && libhandle) ++ dlclose(libhandle); ++ ++ return ret; ++} ++ ++static tier_mode_t ++tier_validate_mode(char *mode) ++{ ++ int ret = -1; ++ ++ if (strcmp(mode, "test") == 0) { ++ ret = TIER_MODE_TEST; ++ } else { ++ ret = TIER_MODE_WM; ++ } ++ ++ return ret; ++} ++ ++static gf_boolean_t ++tier_validate_compact_mode(char *mode) ++{ ++ gf_boolean_t ret = _gf_false; ++ ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "tier_validate_compact_mode: mode = %s", mode); ++ ++ if (!strcmp(mode, "on")) { ++ ret = _gf_true; ++ } else { ++ ret = _gf_false; ++ } ++ ++ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, ++ "tier_validate_compact_mode: ret = %i", ret); ++ ++ return ret; ++} ++ ++int ++tier_init_methods(xlator_t *this) ++{ ++ int ret = -1; ++ dht_conf_t *conf = NULL; ++ dht_methods_t *methods = NULL; ++ ++ GF_VALIDATE_OR_GOTO("tier", this, err); ++ ++ conf = this->private; ++ ++ methods = &(conf->methods); ++ ++ methods->migration_get_dst_subvol = tier_migration_get_dst; ++ methods->migration_other = tier_start; ++ methods->migration_needed = tier_migration_needed; ++ methods->layout_search = tier_search; ++ ++ ret = 0; ++err: ++ return ret; ++} ++ ++static void ++tier_save_vol_name(xlator_t *this) ++{ ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ char *suffix = NULL; ++ int name_len = 0; ++ ++ conf = this->private; ++ defrag = conf->defrag; ++ ++ suffix = strstr(this->name, "-tier-dht"); ++ ++ if (suffix) ++ name_len = suffix - this->name; ++ else ++ name_len = strlen(this->name); ++ ++ if (name_len > GD_VOLUME_NAME_MAX) ++ name_len = GD_VOLUME_NAME_MAX; ++ ++ strncpy(defrag->tier_conf.volname, this->name, name_len); ++ defrag->tier_conf.volname[name_len] = 0; ++} ++ ++int ++tier_init(xlator_t *this) ++{ ++ int ret = -1; ++ int freq = 0; ++ int maxsize = 0; ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ char *voldir = NULL; ++ char *mode = NULL; ++ char *paused = NULL; ++ tier_mode_t tier_mode = DEFAULT_TIER_MODE; ++ gf_boolean_t compact_mode = _gf_false; ++ ++ ret = dht_init(this); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "tier_init failed"); ++ goto out; ++ } ++ ++ conf = this->private; ++ ++ ret = tier_init_methods(this); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "tier_init_methods failed"); ++ goto out; ++ } ++ ++ if (conf->subvolume_cnt != 2) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Invalid number of subvolumes %d", conf->subvolume_cnt); ++ goto out; ++ } ++ ++ /* if instatiated from client side initialization is complete. */ ++ if (!conf->defrag) { ++ ret = 0; ++ goto out; ++ } ++ ++ /* if instatiated from server side, load db libraries */ ++ ret = tier_load_externals(this); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "Could not load externals. Aborting"); ++ goto out; ++ } ++ ++ defrag = conf->defrag; ++ ++ defrag->tier_conf.last_demote_qfile_index = 0; ++ defrag->tier_conf.last_promote_qfile_index = 0; ++ ++ defrag->tier_conf.is_tier = 1; ++ defrag->this = this; ++ ++ ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize); ++ if (ret) { ++ maxsize = 0; ++ } ++ ++ defrag->tier_conf.tier_max_promote_size = maxsize; ++ ++ ret = dict_get_int32(this->options, "tier-promote-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_PROMOTE_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_promote_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "tier-demote-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_DEMOTE_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_demote_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_HOT_COMPACT_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_compact_hot_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq); ++ if (ret) { ++ freq = DEFAULT_COLD_COMPACT_FREQ_SEC; ++ } ++ ++ defrag->tier_conf.tier_compact_cold_frequency = freq; ++ ++ ret = dict_get_int32(this->options, "watermark-hi", &freq); ++ if (ret) { ++ freq = DEFAULT_WM_HI; ++ } ++ ++ defrag->tier_conf.watermark_hi = freq; ++ ++ ret = dict_get_int32(this->options, "watermark-low", &freq); ++ if (ret) { ++ freq = DEFAULT_WM_LOW; ++ } ++ ++ defrag->tier_conf.watermark_low = freq; ++ ++ ret = dict_get_int32(this->options, "write-freq-threshold", &freq); ++ if (ret) { ++ freq = DEFAULT_WRITE_FREQ_SEC; ++ } ++ ++ defrag->write_freq_threshold = freq; ++ ++ ret = dict_get_int32(this->options, "read-freq-threshold", &freq); ++ if (ret) { ++ freq = DEFAULT_READ_FREQ_SEC; ++ } ++ ++ defrag->read_freq_threshold = freq; ++ ++ ret = dict_get_int32(this->options, "tier-max-mb", &freq); ++ if (ret) { ++ freq = DEFAULT_TIER_MAX_MIGRATE_MB; ++ } ++ ++ defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024; ++ ++ ret = dict_get_int32(this->options, "tier-max-files", &freq); ++ if (ret) { ++ freq = DEFAULT_TIER_MAX_MIGRATE_FILES; ++ } ++ ++ defrag->tier_conf.max_migrate_files = freq; ++ ++ ret = dict_get_int32(this->options, "tier-query-limit", ++ &(defrag->tier_conf.query_limit)); ++ if (ret) { ++ defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT; ++ } ++ ++ ret = dict_get_str(this->options, "tier-compact", &mode); ++ ++ if (ret) { ++ defrag->tier_conf.compact_active = DEFAULT_COMP_MODE; ++ } else { ++ compact_mode = tier_validate_compact_mode(mode); ++ /* If compaction is now active, we need to inform the bricks on ++ the hot and cold tier of this. See dht-common.h for more. */ ++ defrag->tier_conf.compact_active = compact_mode; ++ if (compact_mode) { ++ defrag->tier_conf.compact_mode_switched_hot = _gf_true; ++ defrag->tier_conf.compact_mode_switched_cold = _gf_true; ++ } ++ } ++ ++ ret = dict_get_str(this->options, "tier-mode", &mode); ++ if (ret) { ++ defrag->tier_conf.mode = DEFAULT_TIER_MODE; ++ } else { ++ tier_mode = tier_validate_mode(mode); ++ defrag->tier_conf.mode = tier_mode; ++ } ++ ++ pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0); ++ ++ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); ++ ++ ret = dict_get_str(this->options, "tier-pause", &paused); ++ ++ if (paused && strcmp(paused, "on") == 0) ++ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); ++ ++ ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name); ++ if (ret < 0) ++ goto out; ++ ++ ret = mkdir_p(voldir, 0777, _gf_true); ++ if (ret == -1 && errno != EEXIST) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "tier_init failed"); ++ ++ GF_FREE(voldir); ++ goto out; ++ } ++ ++ GF_FREE(voldir); ++ ++ ret = gf_asprintf(&promotion_qfile, "%s/%s/promote", ++ DEFAULT_VAR_RUN_DIRECTORY, this->name); ++ if (ret < 0) ++ goto out; ++ ++ ret = gf_asprintf(&demotion_qfile, "%s/%s/demote", ++ DEFAULT_VAR_RUN_DIRECTORY, this->name); ++ if (ret < 0) { ++ GF_FREE(promotion_qfile); ++ goto out; ++ } ++ ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "Promote/demote frequency %d/%d " ++ "Write/Read freq thresholds %d/%d", ++ defrag->tier_conf.tier_promote_frequency, ++ defrag->tier_conf.tier_demote_frequency, ++ defrag->write_freq_threshold, defrag->read_freq_threshold); ++ ++ tier_save_vol_name(this); ++ ++ ret = 0; ++ ++out: ++ ++ return ret; ++} ++ ++int ++tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data) ++{ ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, ++ "Migrate file paused with op_ret %d", op_ret); ++ ++ return op_ret; ++} ++ ++int ++tier_cli_pause(void *data) ++{ ++ gf_defrag_info_t *defrag = NULL; ++ xlator_t *this = NULL; ++ dht_conf_t *conf = NULL; ++ int ret = -1; ++ ++ this = data; ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, exit); ++ ++ defrag = conf->defrag; ++ GF_VALIDATE_OR_GOTO(this->name, defrag, exit); ++ ++ gf_defrag_pause_tier(this, defrag); ++ ++ ret = 0; ++exit: ++ return ret; ++} ++ ++int ++tier_reconfigure(xlator_t *this, dict_t *options) ++{ ++ dht_conf_t *conf = NULL; ++ gf_defrag_info_t *defrag = NULL; ++ char *mode = NULL; ++ int migrate_mb = 0; ++ gf_boolean_t req_pause = _gf_false; ++ int ret = 0; ++ call_frame_t *frame = NULL; ++ gf_boolean_t last_compact_setting = _gf_false; ++ ++ conf = this->private; ++ ++ if (conf->defrag) { ++ defrag = conf->defrag; ++ GF_OPTION_RECONF("tier-max-promote-file-size", ++ defrag->tier_conf.tier_max_promote_size, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-promote-frequency", ++ defrag->tier_conf.tier_promote_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-demote-frequency", ++ defrag->tier_conf.tier_demote_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low, ++ options, int32, out); ++ ++ last_compact_setting = defrag->tier_conf.compact_active; ++ ++ GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active, ++ options, bool, out); ++ ++ if (last_compact_setting != defrag->tier_conf.compact_active) { ++ defrag->tier_conf.compact_mode_switched_hot = _gf_true; ++ defrag->tier_conf.compact_mode_switched_cold = _gf_true; ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, ++ "compact mode switched"); ++ } ++ ++ GF_OPTION_RECONF("tier-hot-compact-frequency", ++ defrag->tier_conf.tier_compact_hot_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-cold-compact-frequency", ++ defrag->tier_conf.tier_compact_cold_frequency, options, ++ int32, out); ++ ++ GF_OPTION_RECONF("tier-mode", mode, options, str, out); ++ defrag->tier_conf.mode = tier_validate_mode(mode); ++ ++ GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out); ++ defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 * ++ 1024; ++ ++ GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit, ++ options, int32, out); ++ ++ GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out); ++ ++ if (req_pause == _gf_true) { ++ frame = create_frame(this, this->ctx->pool); ++ if (!frame) ++ goto out; ++ ++ frame->root->pid = GF_CLIENT_PID_DEFRAG; ++ ++ ret = synctask_new(this->ctx->env, tier_cli_pause, ++ tier_cli_pause_done, frame, this); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "pause tier failed on reconfigure"); ++ } ++ } else { ++ ret = gf_defrag_resume_tier(this, defrag); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, ++ "resume tier failed on reconfigure"); ++ } ++ } ++ } ++ ++out: ++ return dht_reconfigure(this, options); ++} ++ ++void ++tier_fini(xlator_t *this) ++{ ++ if (libhandle) ++ dlclose(libhandle); ++ ++ GF_FREE(demotion_qfile); ++ GF_FREE(promotion_qfile); ++ ++ dht_fini(this); ++} ++ ++struct xlator_fops fops = { ++ ++ .lookup = dht_lookup, ++ .create = tier_create, ++ .mknod = dht_mknod, ++ ++ .open = dht_open, ++ .statfs = tier_statfs, ++ .opendir = dht_opendir, ++ .readdir = tier_readdir, ++ .readdirp = tier_readdirp, ++ .fsyncdir = dht_fsyncdir, ++ .symlink = dht_symlink, ++ .unlink = tier_unlink, ++ .link = tier_link, ++ .mkdir = dht_mkdir, ++ .rmdir = dht_rmdir, ++ .rename = dht_rename, ++ .entrylk = dht_entrylk, ++ .fentrylk = dht_fentrylk, ++ ++ /* Inode read operations */ ++ .stat = dht_stat, ++ .fstat = dht_fstat, ++ .access = dht_access, ++ .readlink = dht_readlink, ++ .getxattr = dht_getxattr, ++ .fgetxattr = dht_fgetxattr, ++ .readv = dht_readv, ++ .flush = dht_flush, ++ .fsync = dht_fsync, ++ .inodelk = dht_inodelk, ++ .finodelk = dht_finodelk, ++ .lk = dht_lk, ++ ++ /* Inode write operations */ ++ .fremovexattr = dht_fremovexattr, ++ .removexattr = dht_removexattr, ++ .setxattr = dht_setxattr, ++ .fsetxattr = dht_fsetxattr, ++ .truncate = dht_truncate, ++ .ftruncate = dht_ftruncate, ++ .writev = dht_writev, ++ .xattrop = dht_xattrop, ++ .fxattrop = dht_fxattrop, ++ .setattr = dht_setattr, ++ .fsetattr = dht_fsetattr, ++ .fallocate = dht_fallocate, ++ .discard = dht_discard, ++ .zerofill = dht_zerofill, ++}; ++ ++struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget}; ++ ++extern int32_t ++mem_acct_init(xlator_t *this); ++ ++extern struct volume_options dht_options[]; ++ ++xlator_api_t xlator_api = { ++ .init = tier_init, ++ .fini = tier_fini, ++ .notify = dht_notify, ++ .reconfigure = tier_reconfigure, ++ .mem_acct_init = mem_acct_init, ++ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */ ++ .fops = &fops, ++ .cbks = &cbks, ++ .options = dht_options, ++ .identifier = "tier", ++ .category = GF_MAINTAINED, ++}; ++ +diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h +new file mode 100644 +index 0000000..a20b1db +--- /dev/null ++++ b/xlators/cluster/dht/src/tier.h +@@ -0,0 +1,110 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef _TIER_H_ ++#define _TIER_H_ ++ ++/******************************************************************************/ ++/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */ ++#include "dht-common.h" ++#include <glusterfs/xlator.h> ++#include <signal.h> ++#include <fnmatch.h> ++#include <signal.h> ++ ++/* ++ * Size of timer wheel. We would not promote or demote less ++ * frequently than this number. ++ */ ++#define TIMER_SECS 3600 ++ ++#include "gfdb_data_store.h" ++#include <ctype.h> ++#include <sys/stat.h> ++ ++#define PROMOTION_QFILE "promotequeryfile" ++#define DEMOTION_QFILE "demotequeryfile" ++ ++#define TIER_HASHED_SUBVOL conf->subvolumes[0] ++#define TIER_UNHASHED_SUBVOL conf->subvolumes[1] ++ ++#define GET_QFILE_PATH(is_promotion) \ ++ (is_promotion) ? promotion_qfile : demotion_qfile ++ ++typedef struct tier_qfile_array { ++ int *fd_array; ++ ssize_t array_size; ++ ssize_t next_index; ++ /* Indicate the number of exhuasted FDs*/ ++ ssize_t exhausted_count; ++} tier_qfile_array_t; ++ ++typedef struct _query_cbk_args { ++ xlator_t *this; ++ gf_defrag_info_t *defrag; ++ /* This is write */ ++ int query_fd; ++ int is_promotion; ++ int is_compaction; ++ /* This is for read */ ++ tier_qfile_array_t *qfile_array; ++} query_cbk_args_t; ++ ++int ++gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag); ++ ++typedef struct gfdb_brick_info { ++ gfdb_time_t *time_stamp; ++ gf_boolean_t _gfdb_promote; ++ query_cbk_args_t *_query_cbk_args; ++} gfdb_brick_info_t; ++ ++typedef struct brick_list { ++ xlator_t *xlator; ++ char *brick_db_path; ++ char brick_name[NAME_MAX]; ++ char qfile_path[PATH_MAX]; ++ struct list_head list; ++} tier_brick_list_t; ++ ++typedef struct _dm_thread_args { ++ xlator_t *this; ++ gf_defrag_info_t *defrag; ++ struct list_head *brick_list; ++ int freq_time; ++ int return_value; ++ int is_promotion; ++ int is_compaction; ++ gf_boolean_t is_hot_tier; ++} migration_args_t; ++ ++typedef enum tier_watermark_op_ { ++ TIER_WM_NONE = 0, ++ TIER_WM_LOW, ++ TIER_WM_HI, ++ TIER_WM_MID ++} tier_watermark_op_t; ++ ++#define DEFAULT_PROMOTE_FREQ_SEC 120 ++#define DEFAULT_DEMOTE_FREQ_SEC 120 ++#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800 ++#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800 ++#define DEFAULT_DEMOTE_DEGRADED 1 ++#define DEFAULT_WRITE_FREQ_SEC 0 ++#define DEFAULT_READ_FREQ_SEC 0 ++#define DEFAULT_WM_LOW 75 ++#define DEFAULT_WM_HI 90 ++#define DEFAULT_TIER_MODE TIER_MODE_TEST ++#define DEFAULT_COMP_MODE _gf_true ++#define DEFAULT_TIER_MAX_MIGRATE_MB 1000 ++#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000 ++#define DEFAULT_TIER_QUERY_LIMIT 100 ++ ++#endif +diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am +index 194634b..545c02b 100644 +--- a/xlators/features/Makefile.am ++++ b/xlators/features/Makefile.am +@@ -5,6 +5,6 @@ endif + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ + compress changelog gfid-access snapview-client snapview-server trash \ + shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ +- utime ++ utime changetimerecorder + + CLEANFILES = +diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am +new file mode 100644 +index 0000000..a985f42 +--- /dev/null ++++ b/xlators/features/changetimerecorder/Makefile.am +@@ -0,0 +1,3 @@ ++SUBDIRS = src ++ ++CLEANFILES = +diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am +new file mode 100644 +index 0000000..620017e +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/Makefile.am +@@ -0,0 +1,26 @@ ++xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features ++ ++# changetimerecorder can only get build when libgfdb is enabled ++if BUILD_GFDB ++ xlator_LTLIBRARIES = changetimerecorder.la ++endif ++ ++changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) ++ ++changetimerecorder_la_SOURCES = changetimerecorder.c \ ++ ctr-helper.c ctr-xlator-ctx.c ++ ++changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ ++ $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la ++ ++noinst_HEADERS = ctr-messages.h changetimerecorder.h ctr_mem_types.h \ ++ ctr-helper.h ctr-xlator-ctx.h ++ ++AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ ++ -I$(top_srcdir)/libglusterfs/src/gfdb \ ++ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ ++ -DDATADIR=\"$(localstatedir)\" ++ ++AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS) ++ ++CLEANFILES = +diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c +new file mode 100644 +index 0000000..f2aa4a9 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/changetimerecorder.c +@@ -0,0 +1,2371 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++#include <ctype.h> ++#include <sys/uio.h> ++ ++#include "gfdb_sqlite3.h" ++#include "ctr-helper.h" ++#include "ctr-messages.h" ++#include <glusterfs/syscall.h> ++ ++#include "changetimerecorder.h" ++#include "tier-ctr-interface.h" ++ ++/*******************************inode forget***********************************/ ++int ++ctr_forget(xlator_t *this, inode_t *inode) ++{ ++ fini_ctr_xlator_ctx(this, inode); ++ return 0; ++} ++ ++/************************** Look up heal **************************************/ ++/* ++Problem: The CTR xlator records file meta (heat/hardlinks) ++into the data. This works fine for files which are created ++after ctr xlator is switched ON. But for files which were ++created before CTR xlator is ON, CTR xlator is not able to ++record either of the meta i.e heat or hardlinks. Thus making ++those files immune to promotions/demotions. ++ ++Solution: The solution that is implemented in this patch is ++do ctr-db heal of all those pre-existent files, using named lookup. ++For this purpose we use the inode-xlator context variable option ++in gluster. ++The inode-xlator context variable for ctr xlator will have the ++following, ++ a. A Lock for the context variable ++ b. A hardlink list: This list represents the successful looked ++ up hardlinks. ++These are the scenarios when the hardlink list is updated: ++1) Named-Lookup: Whenever a named lookup happens on a file, in the ++ wind path we copy all required hardlink and inode information to ++ ctr_db_record structure, which resides in the frame->local variable. ++ We don't update the database in wind. During the unwind, we read the ++ information from the ctr_db_record and , ++ Check if the inode context variable is created, if not we create it. ++ Check if the hard link is there in the hardlink list. ++ If its not there we add it to the list and send a update to the ++ database using libgfdb. ++ Please note: The database transaction can fail(and we ignore) as there ++ already might be a record in the db. This update to the db is to heal ++ if its not there. ++ If its there in the list we ignore it. ++2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in ++ the inode context variable and delete the inode context variable. ++ Please note: An inode forget may happen for two reason, ++ a. when the inode is delete. ++ b. the in-memory inode is evicted from the inode table due to cache limits. ++3) create: whenever a create happens we create the inode context variable and ++ add the hardlink. The database updation is done as usual by ctr. ++4) link: whenever a hardlink is created for the inode, we create the inode ++ context variable, if not present, and add the hardlink to the list. ++5) unlink: whenever a unlink happens we delete the hardlink from the list. ++6) mknod: same as create. ++7) rename: whenever a rename happens we update the hardlink in list. if the ++ hardlink was not present for updation, we add the hardlink to the list. ++ ++What is pending: ++1) This solution will only work for named lookups. ++2) We don't track afr-self-heal/dht-rebalancer traffic for healing. ++ ++*/ ++ ++/* This function does not write anything to the db, ++ * just created the local variable ++ * for the frame and sets values for the ctr_db_record */ ++static int ++ctr_lookup_wind(call_frame_t *frame, xlator_t *this, ++ gf_ctr_inode_context_t *ctr_inode_cx) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ GF_ASSERT(this); ++ IS_CTR_INODE_CX_SANE(ctr_inode_cx); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { ++ frame->local = init_ctr_local_t(this); ++ if (!frame->local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error while creating ctr local"); ++ goto out; ++ }; ++ ctr_local = frame->local; ++ /*Definitely no internal fops will reach here*/ ++ ctr_local->is_internal_fop = _gf_false; ++ /*Don't record counters*/ ++ CTR_DB_REC(ctr_local).do_record_counters = _gf_false; ++ /*Don't record time at all*/ ++ CTR_DB_REC(ctr_local).do_record_times = _gf_false; ++ ++ /* Copy gfid into db record*/ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid)); ++ ++ /* Set fop_path and fop_type, required by libgfdb to make ++ * decision while inserting the record */ ++ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; ++ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; ++ ++ /* Copy hard link info*/ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid, ++ *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); ++ if (snprintf(CTR_DB_REC(ctr_local).file_name, ++ sizeof(CTR_DB_REC(ctr_local).file_name), "%s", ++ NEW_LINK_CX(ctr_inode_cx)->basename) >= ++ sizeof(CTR_DB_REC(ctr_local).file_name)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error copying filename of ctr local"); ++ goto out; ++ } ++ /* Since we are in lookup we can ignore errors while ++ * Inserting in the DB, because there may be many ++ * to write to the DB attempts for healing. ++ * We don't want to log all failed attempts and ++ * bloat the log*/ ++ ctr_local->gfdb_db_record.ignore_errors = _gf_true; ++ } ++ ++ ret = 0; ++ ++out: ++ ++ if (ret) { ++ free_ctr_local(ctr_local); ++ frame->local = NULL; ++ } ++ ++ return ret; ++} ++ ++/* This function inserts the ctr_db_record populated by ctr_lookup_wind ++ * in to the db. It also destroys the frame->local created by ctr_lookup_wind */ ++static int ++ctr_lookup_unwind(call_frame_t *frame, xlator_t *this) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(_priv->_db_conn); ++ ++ ctr_local = frame->local; ++ ++ if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) { ++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); ++ if (ret == -1) { ++ gf_msg(this->name, ++ _gfdb_log_level(GF_LOG_ERROR, ++ ctr_local->gfdb_db_record.ignore_errors), ++ 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, ++ "UNWIND: Error filling ctr local"); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ free_ctr_local(ctr_local); ++ frame->local = NULL; ++ return ret; ++} ++ ++/****************************************************************************** ++ * ++ * FOPS HANDLING BELOW ++ * ++ * ***************************************************************************/ ++ ++/****************************LOOKUP********************************************/ ++ ++int32_t ++ctr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *dict, struct iatt *postparent) ++{ ++ int ret = -1; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; ++ gf_boolean_t _is_heal_needed = _gf_false; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /* if the lookup failed lookup don't do anything*/ ++ if (op_ret == -1) { ++ gf_msg_trace(this->name, 0, "lookup failed with %s", ++ strerror(op_errno)); ++ goto out; ++ } ++ ++ /* Ignore directory lookups */ ++ if (inode->ia_type == IA_IFDIR) { ++ goto out; ++ } ++ ++ /* if frame local was not set by the ctr_lookup() ++ * so don't so anything*/ ++ if (!frame->local) { ++ goto out; ++ } ++ ++ /* if the lookup is for dht link donot record*/ ++ if (dht_is_linkfile(buf, dict)) { ++ gf_msg_trace(this->name, 0, ++ "Ignoring Lookup " ++ "for dht link file"); ++ goto out; ++ } ++ ++ ctr_local = frame->local; ++ /*Assign the proper inode type*/ ++ ctr_local->ia_inode_type = inode->ia_type; ++ ++ /* Copy gfid directly from inode */ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid); ++ ++ /* Checking if gfid and parent gfid is valid */ ++ if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) || ++ gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) { ++ gf_msg_trace(this->name, 0, "Invalid GFID"); ++ goto out; ++ } ++ ++ /* if its a first entry ++ * then mark the ctr_record for create ++ * A create will attempt a file and a hard link created in the db*/ ++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ /* This marks inode heal */ ++ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; ++ _is_heal_needed = _gf_true; ++ } ++ ++ /* Copy the correct gfid from resolved inode */ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid); ++ ++ /* Add hard link to the list */ ++ ret_val = add_hard_link_ctx(frame, this, inode); ++ if (ret_val == CTR_CTX_ERROR) { ++ gf_msg_trace(this->name, 0, "Failed adding hardlink to list"); ++ goto out; ++ } ++ /* If inode needs healing then heal the hardlink also */ ++ else if (ret_val & CTR_TRY_INODE_HEAL) { ++ /* This marks inode heal */ ++ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; ++ _is_heal_needed = _gf_true; ++ } ++ /* If hardlink needs healing */ ++ else if (ret_val & CTR_TRY_HARDLINK_HEAL) { ++ _is_heal_needed = _gf_true; ++ } ++ ++ /* If lookup heal needed */ ++ if (!_is_heal_needed) ++ goto out; ++ ++ /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind ++ * in to the db. It also destroys the frame->local ++ * created by ctr_lookup_wind */ ++ ret = ctr_lookup_unwind(frame, this); ++ if (ret) { ++ gf_msg_trace(this->name, 0, "Failed healing/inserting link"); ++ } ++ ++out: ++ free_ctr_local((gf_ctr_local_t *)frame->local); ++ frame->local = NULL; ++ ++ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict, ++ postparent); ++ ++ return 0; ++} ++ ++int32_t ++ctr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) ++{ ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /* Don't handle nameless lookups*/ ++ if (!loc->parent || !loc->name) ++ goto out; ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out); ++ ++ /* Fill ctr inode context*/ ++ /* IA_IFREG : We assume its a file in the wind ++ * but in the unwind we are sure what the inode is a file ++ * or directory ++ * gfid: we are just filling loc->gfid which is not correct. ++ * In unwind we fill the correct gfid for successful lookup*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, loc->gfid, _link_cx, NULL, ++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); ++ ++ /* Create the frame->local and populate ctr_db_record ++ * No writing to the db yet */ ++ ret = ctr_lookup_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED, ++ "Failed to insert link wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xdata); ++ return 0; ++} ++ ++/****************************WRITEV********************************************/ ++int32_t ++ctr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, ++ "Failed to insert writev unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, ++ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_WIND_FAILED, ++ "Failed to insert writev wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_writev_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags, ++ iobref, xdata); ++ ++ return 0; ++} ++ ++/******************************setattr*****************************************/ ++ ++int32_t ++ctr_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf, ++ struct iatt *postop_stbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, ++ "Failed to insert setattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop_stbuf, ++ postop_stbuf, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, ++ int32_t valid, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert setattr wind"); ++ } ++out: ++ ++ STACK_WIND(frame, ctr_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ ++ return 0; ++} ++ ++/*************************** fsetattr ***************************************/ ++int32_t ++ctr_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf, ++ struct iatt *postop_stbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, ++ "Failed to insert fsetattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, preop_stbuf, ++ postop_stbuf, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, ++ int32_t valid, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert fsetattr wind"); ++ } ++out: ++ STACK_WIND(frame, ctr_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ ++ return 0; ++} ++/****************************fremovexattr************************************/ ++ ++int32_t ++ctr_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED, ++ "Failed to insert fremovexattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED, ++ "Failed to insert fremovexattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_fremovexattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); ++ return 0; ++} ++ ++/****************************removexattr*************************************/ ++ ++int32_t ++ctr_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED, ++ "Failed to insert removexattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED, ++ "Failed to insert removexattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_removexattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); ++ return 0; ++} ++ ++/****************************truncate****************************************/ ++ ++int32_t ++ctr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED, ++ "Failed to insert truncate unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, ++ "Failed to insert truncate wind"); ++ } ++out: ++ STACK_WIND(frame, ctr_truncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); ++ return 0; ++} ++ ++/****************************ftruncate***************************************/ ++ ++int32_t ++ctr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED, ++ "Failed to insert ftruncate unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, ++ "Failed to insert ftruncate wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_ftruncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); ++ return 0; ++} ++ ++/****************************rename******************************************/ ++int32_t ++ctr_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ uint32_t remaining_links = -1; ++ gf_ctr_local_t *ctr_local = NULL; ++ gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP; ++ gfdb_fop_path_t fop_path = GFDB_FOP_INVALID; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED, ++ "Failed to insert rename unwind"); ++ goto out; ++ } ++ ++ if (!xdata) ++ goto out; ++ /* ++ * ++ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator ++ * This is only set when we are overwriting hardlinks. ++ * ++ * */ ++ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, ++ &remaining_links); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA"); ++ remaining_links = -1; ++ goto out; ++ } ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_NULL_LOCAL, ++ "ctr_local is NULL."); ++ goto out; ++ } ++ ++ /* This is not the only link */ ++ if (remaining_links > 1) { ++ fop_type = GFDB_FOP_DENTRY_WRITE; ++ fop_path = GFDB_FOP_UNDEL; ++ } ++ /* Last link that was deleted */ ++ else if (remaining_links == 1) { ++ fop_type = GFDB_FOP_DENTRY_WRITE; ++ fop_path = GFDB_FOP_UNDEL_ALL; ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED, ++ "Invalid link count from posix"); ++ goto out; ++ } ++ ++ ret = ctr_delete_hard_link_from_db( ++ this, CTR_DB_REC(ctr_local).old_gfid, CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name, fop_type, fop_path); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to delete records of %s", ++ CTR_DB_REC(ctr_local).old_file_name); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, ++ postoldparent, prenewparent, postnewparent, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t new_link_cx, old_link_cx; ++ gf_ctr_link_context_t *_nlink_cx = &new_link_cx; ++ gf_ctr_link_context_t *_olink_cx = &old_link_cx; ++ int is_dict_created = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill old link context*/ ++ FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out); ++ ++ /*Fill new link context*/ ++ FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, ++ oldloc->inode->gfid, _nlink_cx, _olink_cx, ++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); ++ ++ /* If the rename is a overwrite of hardlink ++ * rename ("file1", "file2") ++ * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A ++ * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B ++ * so we are saving file2 gfid in old_gfid so that we delete entries ++ * from the db during rename callback if the fop is successful ++ * */ ++ if (newloc->inode) { ++ /* This is the GFID from where the newloc hardlink will be ++ * unlinked */ ++ _inode_cx->old_gfid = &newloc->inode->gfid; ++ } ++ ++ /* Is a metatdata fop */ ++ _inode_cx->is_metadata_fop = _gf_true; ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_WIND_FAILED, ++ "Failed to insert rename wind"); ++ } else { ++ /* We are doing updation of hard link in inode context in wind ++ * As we don't get the "inode" in the call back for rename */ ++ ret = update_hard_link_ctx(frame, this, oldloc->inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_UPDATE_HARDLINK_FAILED, ++ "Failed " ++ "updating hard link in ctr inode context"); ++ goto out; ++ } ++ ++ /* If the newloc has an inode. i.e acquiring hardlink of an ++ * exisitng file i.e overwritting a file. ++ * */ ++ if (newloc->inode) { ++ /* Getting the ctr inode context variable for ++ * inode whose hardlink will be acquired during ++ * the rename ++ * */ ++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, newloc->inode); ++ if (!ctr_xlator_ctx) { ++ /* Since there is no ctr inode context ++ * so nothing more to do */ ++ ret = 0; ++ goto out; ++ } ++ ++ /* Deleting hardlink from context variable */ ++ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, newloc->pargfid, ++ newloc->name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed to delete hard link"); ++ goto out; ++ } ++ ++ /* Requesting for number of hardlinks on the newloc ++ * inode from POSIX. ++ * */ ++ is_dict_created = set_posix_link_request(this, &xdata); ++ if (is_dict_created == -1) { ++ ret = -1; ++ goto out; ++ } ++ } ++ } ++ ++out: ++ STACK_WIND(frame, ctr_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ ++ if (is_dict_created == 1) { ++ dict_unref(xdata); ++ } ++ ++ return 0; ++} ++ ++/****************************unlink******************************************/ ++int32_t ++ctr_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ uint32_t remaining_links = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ if (!xdata) ++ goto out; ++ ++ /* ++ * ++ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator ++ * ++ * */ ++ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, ++ &remaining_links); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA"); ++ remaining_links = -1; ++ } ++ ++ /*This is not the only link*/ ++ if (remaining_links != 1) { ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNDEL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to insert unlink unwind"); ++ } ++ } ++ /*Last link that was deleted*/ ++ else if (remaining_links == 1) { ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNDEL_ALL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to insert unlink unwind"); ++ } ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, ++ xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ gf_boolean_t is_xdata_created = _gf_false; ++ struct iatt dummy_stat = {0}; ++ ++ GF_ASSERT(frame); ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ _link_cx, NULL, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_WDEL); ++ ++ /*Internal FOP*/ ++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); ++ ++ /* Is a metadata FOP */ ++ _inode_cx->is_metadata_fop = _gf_true; ++ ++ /* If its a internal FOP and dht link file donot record*/ ++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { ++ goto out; ++ } ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, ++ "Failed to insert unlink wind"); ++ } else { ++ /* We are doing delete of hard link in inode context in wind ++ * As we don't get the "inode" in the call back for rename */ ++ ret = delete_hard_link_ctx(frame, this, loc->inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed " ++ "deleting hard link from ctr inode context"); ++ } ++ } ++ ++ /* ++ * ++ * Sending GF_REQUEST_LINK_COUNT_XDATA ++ * to POSIX Xlator to send link count in unwind path ++ * ++ * */ ++ /*create xdata if NULL*/ ++ if (!xdata) { ++ xdata = dict_new(); ++ is_xdata_created = (xdata) ? _gf_true : _gf_false; ++ } ++ if (!xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL, ++ "xdata is NULL :Cannot send " ++ "GF_REQUEST_LINK_COUNT_XDATA to posix"); ++ goto out; ++ } ++ ++ ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed setting GF_REQUEST_LINK_COUNT_XDATA"); ++ if (is_xdata_created) { ++ dict_unref(xdata); ++ } ++ goto out; ++ } ++ ++out: ++ STACK_WIND(frame, ctr_unlink_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ ++ if (is_xdata_created) ++ dict_unref(xdata); ++ ++ return 0; ++} ++ ++/****************************fsync******************************************/ ++int32_t ++ctr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, ++ "Failed to insert fsync unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_WIND_FAILED, ++ "Failed to insert fsync wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_fsync_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); ++ return 0; ++} ++ ++/****************************setxattr****************************************/ ++ ++int ++ctr_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, ++ "Failed to insert setxattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int ++ctr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, ++ int flags, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, ++ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert setxattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_setxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata); ++ return 0; ++} ++/**************************** fsetxattr *************************************/ ++int32_t ++ctr_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, ++ "Failed to insert fsetxattr unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); ++ ++ return 0; ++} ++ ++int32_t ++ctr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, ++ int32_t flags, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ "Failed to insert fsetxattr wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_fsetxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); ++ return 0; ++} ++/****************************mknod*******************************************/ ++ ++int32_t ++ctr_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, struct iatt *buf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ /* Add hard link to the list */ ++ ret_val = add_hard_link_ctx(frame, this, inode); ++ if (ret_val == CTR_CTX_ERROR) { ++ gf_msg_trace(this->name, 0, "Failed adding hard link"); ++ } ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED, ++ "Failed to insert mknod unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); ++ ++ return 0; ++} ++ ++int ++ctr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, ++ dev_t rdev, mode_t umask, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ uuid_t gfid = { ++ 0, ++ }; ++ uuid_t *ptr_gfid = &gfid; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /*get gfid from xdata dict*/ ++ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); ++ if (ret) { ++ gf_msg_debug(this->name, 0, "failed to get gfid from dict"); ++ goto out; ++ } ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx, ++ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_WIND_FAILED, ++ "Failed to insert mknod wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_mknod_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); ++ return 0; ++} ++ ++/****************************create******************************************/ ++int ++ctr_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = add_hard_link_ctx(frame, this, inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_ADD_HARDLINK_FAILED, ++ "Failed adding hard link"); ++ } ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, ++ "Failed to insert create unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ ++ return 0; ++} ++ ++int ++ctr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ uuid_t gfid = { ++ 0, ++ }; ++ uuid_t *ptr_gfid = &gfid; ++ struct iatt dummy_stat = {0}; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /*Get GFID from Xdata dict*/ ++ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_GET_GFID_FROM_DICT_FAILED, ++ "failed to get gfid from dict"); ++ goto out; ++ } ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx, ++ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); ++ ++ /*Internal FOP*/ ++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); ++ ++ /* If its a internal FOP and dht link file donot record*/ ++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { ++ goto out; ++ } ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, &ctr_inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_WIND_FAILED, ++ "Failed to insert create wind"); ++ } ++out: ++ STACK_WIND(frame, ctr_create_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, ++ xdata); ++ return 0; ++} ++ ++/****************************link********************************************/ ++ ++int ++ctr_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, inode_t *inode, struct iatt *stbuf, ++ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ /* Add hard link to the list */ ++ ret = add_hard_link_ctx(frame, this, inode); ++ if (ret) { ++ gf_msg_trace(this->name, 0, "Failed adding hard link"); ++ } ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, ++ GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, ++ "Failed to insert create unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent, ++ postparent, xdata); ++ return 0; ++} ++ ++int ++ctr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, ++ dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ gf_ctr_link_context_t ctr_link_cx; ++ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; ++ struct iatt dummy_stat = {0}; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ /*fill ctr link context*/ ++ FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, ++ oldloc->inode->gfid, _link_cx, NULL, ++ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); ++ ++ /*Internal FOP*/ ++ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); ++ ++ /* Is a metadata fop */ ++ _inode_cx->is_metadata_fop = _gf_true; ++ ++ /* If its a internal FOP and dht link file donot record*/ ++ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { ++ goto out; ++ } ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED, ++ "Failed to insert link wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_link_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); ++ return 0; ++} ++ ++/******************************readv*****************************************/ ++int ++ctr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, struct iovec *vector, int count, struct iatt *stbuf, ++ struct iobref *iobref, dict_t *xdata) ++{ ++ int ret = -1; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); ++ ++ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, GFDB_FOP_UNWIND); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, ++ "Failed to insert create unwind"); ++ } ++ ++out: ++ ctr_free_frame_local(frame); ++ ++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf, ++ iobref, xdata); ++ return 0; ++} ++ ++int ++ctr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, ++ uint32_t flags, dict_t *xdata) ++{ ++ int ret = -1; ++ gf_ctr_inode_context_t ctr_inode_cx; ++ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; ++ ++ CTR_IS_DISABLED_THEN_GOTO(this, out); ++ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); ++ ++ /*Fill ctr inode context*/ ++ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, ++ NULL, GFDB_FOP_INODE_READ, GFDB_FOP_WIND); ++ ++ /*record into the database*/ ++ ret = ctr_insert_wind(frame, this, _inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_READV_WIND_FAILED, ++ "Failed to insert readv wind"); ++ } ++ ++out: ++ STACK_WIND(frame, ctr_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, off, flags, xdata); ++ return 0; ++} ++ ++/*******************************ctr_ipc****************************************/ ++ ++/*This is the call back function per record/file from data base*/ ++static int ++ctr_db_query_callback(gfdb_query_record_t *gfdb_query_record, void *args) ++{ ++ int ret = -1; ++ ctr_query_cbk_args_t *query_cbk_args = args; ++ ++ GF_VALIDATE_OR_GOTO("ctr", query_cbk_args, out); ++ ++ ret = gfdb_write_query_record(query_cbk_args->query_fd, gfdb_query_record); ++ if (ret) { ++ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "Failed to write to query file"); ++ goto out; ++ } ++ ++ query_cbk_args->count++; ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++/* This function does all the db queries related to tiering and ++ * generates/populates new/existing query file ++ * inputs: ++ * xlator_t *this : CTR Translator ++ * void *conn_node : Database connection ++ * char *query_file: the query file that needs to be updated ++ * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters ++ * Return: ++ * On success 0 ++ * On failure -1 ++ * */ ++int ++ctr_db_query(xlator_t *this, void *conn_node, char *query_file, ++ gfdb_ipc_ctr_params_t *ipc_ctr_params) ++{ ++ int ret = -1; ++ ctr_query_cbk_args_t query_cbk_args = {0}; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, conn_node, out); ++ GF_VALIDATE_OR_GOTO(this->name, query_file, out); ++ GF_VALIDATE_OR_GOTO(this->name, ipc_ctr_params, out); ++ ++ /*Query for eligible files from db*/ ++ query_cbk_args.query_fd = open(query_file, O_WRONLY | O_CREAT | O_APPEND, ++ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); ++ if (query_cbk_args.query_fd < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, CTR_MSG_FATAL_ERROR, ++ "Failed to open query file %s", query_file); ++ goto out; ++ } ++ if (!ipc_ctr_params->is_promote) { ++ if (ipc_ctr_params->emergency_demote) { ++ /* emergency demotion mode */ ++ ret = find_all(conn_node, ctr_db_query_callback, ++ (void *)&query_cbk_args, ++ ipc_ctr_params->query_limit); ++ } else { ++ if (ipc_ctr_params->write_freq_threshold == 0 && ++ ipc_ctr_params->read_freq_threshold == 0) { ++ ret = find_unchanged_for_time(conn_node, ctr_db_query_callback, ++ (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp); ++ } else { ++ ret = find_unchanged_for_time_freq( ++ conn_node, ctr_db_query_callback, (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp, ++ ipc_ctr_params->write_freq_threshold, ++ ipc_ctr_params->read_freq_threshold, _gf_false); ++ } ++ } ++ } else { ++ if (ipc_ctr_params->write_freq_threshold == 0 && ++ ipc_ctr_params->read_freq_threshold == 0) { ++ ret = find_recently_changed_files(conn_node, ctr_db_query_callback, ++ (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp); ++ } else { ++ ret = find_recently_changed_files_freq( ++ conn_node, ctr_db_query_callback, (void *)&query_cbk_args, ++ &ipc_ctr_params->time_stamp, ++ ipc_ctr_params->write_freq_threshold, ++ ipc_ctr_params->read_freq_threshold, _gf_false); ++ } ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: query from db failed"); ++ goto out; ++ } ++ ++ ret = clear_files_heat(conn_node); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed to clear db entries"); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ ++ if (!ret) ++ ret = query_cbk_args.count; ++ ++ if (query_cbk_args.query_fd >= 0) { ++ sys_close(query_cbk_args.query_fd); ++ query_cbk_args.query_fd = -1; ++ } ++ ++ return ret; ++} ++ ++void * ++ctr_compact_thread(void *args) ++{ ++ int ret = -1; ++ void *db_conn = NULL; ++ ++ xlator_t *this = NULL; ++ gf_ctr_private_t *priv = NULL; ++ gf_boolean_t compact_active = _gf_false; ++ gf_boolean_t compact_mode_switched = _gf_false; ++ ++ this = (xlator_t *)args; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ ++ priv = this->private; ++ ++ db_conn = priv->_db_conn; ++ compact_active = priv->compact_active; ++ compact_mode_switched = priv->compact_mode_switched; ++ ++ gf_msg("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, "Starting compaction"); ++ ++ ret = compact_db(db_conn, compact_active, compact_mode_switched); ++ ++ if (ret) { ++ gf_msg("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to perform the compaction"); ++ } ++ ++ ret = pthread_mutex_lock(&priv->compact_lock); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to acquire lock"); ++ goto out; ++ } ++ ++ /* We are done compaction on this brick. Set all flags to false */ ++ priv->compact_active = _gf_false; ++ priv->compact_mode_switched = _gf_false; ++ ++ ret = pthread_mutex_unlock(&priv->compact_lock); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to release lock"); ++ goto out; ++ } ++ ++out: ++ return NULL; ++} ++ ++int ++ctr_ipc_helper(xlator_t *this, dict_t *in_dict, dict_t *out_dict) ++{ ++ int ret = -1; ++ char *ctr_ipc_ops = NULL; ++ gf_ctr_private_t *priv = NULL; ++ char *db_version = NULL; ++ char *db_param_key = NULL; ++ char *db_param = NULL; ++ char *query_file = NULL; ++ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; ++ int result = 0; ++ pthread_t compact_thread; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, this->private, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv->_db_conn, out); ++ GF_VALIDATE_OR_GOTO(this->name, in_dict, out); ++ GF_VALIDATE_OR_GOTO(this->name, out_dict, out); ++ ++ GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, ctr_ipc_ops, ++ out); ++ ++ /*if its a db clear operation */ ++ if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS, ++ SLEN(GFDB_IPC_CTR_CLEAR_OPS)) == 0) { ++ ret = clear_files_heat(priv->_db_conn); ++ if (ret) ++ goto out; ++ ++ } /* if its a query operation, in which case its query + clear db*/ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS, ++ SLEN(GFDB_IPC_CTR_QUERY_OPS)) == 0) { ++ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, &query_file); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting query file path"); ++ goto out; ++ } ++ ++ ret = dict_get_bin(in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, ++ (void *)&ipc_ctr_params); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting query parameters"); ++ goto out; ++ } ++ ++ ret = ctr_db_query(this, priv->_db_conn, query_file, ipc_ctr_params); ++ ++ ret = dict_set_int32(out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ret); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed setting query reply"); ++ goto out; ++ } ++ ++ } /* if its a query for db version */ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS, ++ SLEN(GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) { ++ ret = get_db_version(priv->_db_conn, &db_version); ++ if (ret == -1 || !db_version) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting db version "); ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, out_dict, GFDB_IPC_CTR_RET_DB_VERSION, ++ db_version, ret, error); ++ ++ } /* if its a query for a db setting */ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS, ++ SLEN(GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) { ++ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_DB_KEY, &db_param_key); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed extracting db param key"); ++ goto out; ++ } ++ ++ ret = get_db_params(priv->_db_conn, db_param_key, &db_param); ++ if (ret == -1 || !db_param) { ++ goto out; ++ } ++ ++ SET_DB_PARAM_TO_DICT(this->name, out_dict, db_param_key, db_param, ret, ++ error); ++ } /* if its an attempt to compact the database */ ++ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ++ SLEN(GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) { ++ ret = pthread_mutex_lock(&priv->compact_lock); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to acquire lock for compaction"); ++ goto out; ++ } ++ ++ if ((priv->compact_active || priv->compact_mode_switched)) { ++ /* Compaction in progress. LEAVE */ ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Compaction already in progress."); ++ pthread_mutex_unlock(&priv->compact_lock); ++ goto out; ++ } ++ /* At this point, we should be the only one on the brick */ ++ /* compacting */ ++ ++ /* Grab the arguments from the dictionary */ ++ ret = dict_get_int32(in_dict, "compact_active", &result); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to get compaction type"); ++ goto out; ++ } ++ ++ if (result) { ++ priv->compact_active = _gf_true; ++ } ++ ++ ret = dict_get_int32(in_dict, "compact_mode_switched", &result); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to see if compaction switched"); ++ goto out; ++ } ++ ++ if (result) { ++ priv->compact_mode_switched = _gf_true; ++ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, ++ "Pre-thread: Compact mode switch is true"); ++ } else { ++ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, ++ "Pre-thread: Compact mode switch is false"); ++ } ++ ++ ret = pthread_mutex_unlock(&priv->compact_lock); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to release lock for compaction"); ++ goto out; ++ } ++ ++ ret = gf_thread_create(&compact_thread, NULL, ctr_compact_thread, ++ (void *)this, "ctrcomp"); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed to spawn compaction thread"); ++ goto out; ++ } ++ ++ goto out; ++ } /* default case */ ++ else { ++ goto out; ++ } ++ ++ ret = 0; ++ goto out; ++error: ++ GF_FREE(db_param_key); ++ GF_FREE(db_param); ++ GF_FREE(db_version); ++out: ++ return ret; ++} ++ ++/* IPC Call from tier migrator to clear the heat on the DB */ ++int32_t ++ctr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *in_dict) ++{ ++ int ret = -1; ++ gf_ctr_private_t *priv = NULL; ++ dict_t *out_dict = NULL; ++ ++ GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); ++ GF_ASSERT(priv->_db_conn); ++ GF_VALIDATE_OR_GOTO(this->name, in_dict, wind); ++ ++ if (op != GF_IPC_TARGET_CTR) ++ goto wind; ++ ++ out_dict = dict_new(); ++ if (!out_dict) { ++ goto out; ++ } ++ ++ ret = ctr_ipc_helper(this, in_dict, out_dict); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, ++ "Failed in ctr_ipc_helper"); ++ } ++out: ++ ++ STACK_UNWIND_STRICT(ipc, frame, ret, 0, out_dict); ++ ++ if (out_dict) ++ dict_unref(out_dict); ++ ++ return 0; ++ ++wind: ++ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ipc, op, in_dict); ++ ++ return 0; ++} ++ ++/* Call to initialize db for ctr xlator while ctr is enabled */ ++int32_t ++initialize_ctr_resource(xlator_t *this, gf_ctr_private_t *priv) ++{ ++ int ret_db = -1; ++ dict_t *params_dict = NULL; ++ ++ if (!priv) ++ goto error; ++ ++ /* For compaction */ ++ priv->compact_active = _gf_false; ++ priv->compact_mode_switched = _gf_false; ++ ret_db = pthread_mutex_init(&priv->compact_lock, NULL); ++ ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed initializing compaction mutex"); ++ goto error; ++ } ++ ++ params_dict = dict_new(); ++ if (!params_dict) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INIT_DB_PARAMS_FAILED, ++ "DB Params cannot initialized!"); ++ goto error; ++ } ++ ++ /*Extract db params options*/ ++ ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type); ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED, ++ "Failed extracting db params options"); ++ goto error; ++ } ++ ++ /*Create a memory pool for ctr xlator*/ ++ this->local_pool = mem_pool_new(gf_ctr_local_t, 64); ++ if (!this->local_pool) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED, ++ "failed to create local memory pool"); ++ goto error; ++ } ++ ++ /*Initialize Database Connection*/ ++ priv->_db_conn = init_db(params_dict, priv->gfdb_db_type); ++ if (!priv->_db_conn) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed initializing data base"); ++ goto error; ++ } ++ ++ ret_db = 0; ++ goto out; ++ ++error: ++ if (this) ++ mem_pool_destroy(this->local_pool); ++ ++ if (priv) { ++ GF_FREE(priv->ctr_db_path); ++ } ++ GF_FREE(priv); ++ ret_db = -1; ++out: ++ if (params_dict) ++ dict_unref(params_dict); ++ ++ return ret_db; ++} ++ ++/******************************************************************************/ ++int ++reconfigure(xlator_t *this, dict_t *options) ++{ ++ char *temp_str = NULL; ++ int ret = 0; ++ gf_ctr_private_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (dict_get_str(options, "changetimerecorder.frequency", &temp_str)) { ++ gf_msg(this->name, GF_LOG_TRACE, 0, CTR_MSG_SET, "set"); ++ } ++ ++ GF_OPTION_RECONF("ctr-enabled", priv->enabled, options, bool, out); ++ if (!priv->enabled) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, ++ "CTR Xlator is not enabled so skip ctr reconfigure"); ++ goto out; ++ } ++ ++ /* If ctr is enabled after skip init for ctr xlator then call ++ initialize_ctr_resource during reconfigure phase to allocate resources ++ for xlator ++ */ ++ if (priv->enabled && !priv->_db_conn) { ++ ret = initialize_ctr_resource(this, priv); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed ctr initialize resource"); ++ goto out; ++ } ++ } ++ ++ GF_OPTION_RECONF("record-counters", priv->ctr_record_counter, options, bool, ++ out); ++ ++ GF_OPTION_RECONF("ctr-record-metadata-heat", priv->ctr_record_metadata_heat, ++ options, bool, out); ++ ++ GF_OPTION_RECONF("ctr_link_consistency", priv->ctr_link_consistency, ++ options, bool, out); ++ ++ GF_OPTION_RECONF("ctr_lookupheal_inode_timeout", ++ priv->ctr_lookupheal_inode_timeout, options, uint64, out); ++ ++ GF_OPTION_RECONF("ctr_lookupheal_link_timeout", ++ priv->ctr_lookupheal_link_timeout, options, uint64, out); ++ ++ GF_OPTION_RECONF("record-exit", priv->ctr_record_unwind, options, bool, ++ out); ++ ++ GF_OPTION_RECONF("record-entry", priv->ctr_record_wind, options, bool, out); ++ ++ /* If database is sqlite */ ++ if (priv->gfdb_db_type == GFDB_SQLITE3) { ++ /* AUTOCHECKPOINT */ ++ if (dict_get_str(options, GFDB_SQL_PARAM_WAL_AUTOCHECK, &temp_str) == ++ 0) { ++ ret = set_db_params(priv->_db_conn, "wal_autocheckpoint", temp_str); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ "Failed to set %s", GFDB_SQL_PARAM_WAL_AUTOCHECK); ++ } ++ } ++ ++ /* CACHE_SIZE */ ++ if (dict_get_str(options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str) == 0) { ++ ret = set_db_params(priv->_db_conn, "cache_size", temp_str); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ "Failed to set %s", GFDB_SQL_PARAM_CACHE_SIZE); ++ } ++ } ++ } ++ ++ ret = 0; ++ ++out: ++ ++ return ret; ++} ++ ++/****************************init********************************************/ ++ ++int32_t ++init(xlator_t *this) ++{ ++ gf_ctr_private_t *priv = NULL; ++ int ret_db = -1; ++ ++ if (!this) { ++ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: ctr this is not initialized"); ++ return -1; ++ } ++ ++ if (!this->children || this->children->next) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: ctr should have exactly one child"); ++ return -1; ++ } ++ ++ if (!this->parents) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DANGLING_VOLUME, ++ "dangling volume. check volfile "); ++ } ++ ++ priv = GF_CALLOC(1, sizeof(*priv), gf_ctr_mt_private_t); ++ if (!priv) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED, ++ "Calloc did not work!!!"); ++ return -1; ++ } ++ ++ /*Default values for the translator*/ ++ priv->ctr_record_wind = _gf_true; ++ priv->ctr_record_unwind = _gf_false; ++ priv->ctr_hot_brick = _gf_false; ++ priv->gfdb_db_type = GFDB_SQLITE3; ++ priv->gfdb_sync_type = GFDB_DB_SYNC; ++ priv->_db_conn = NULL; ++ priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD; ++ priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD; ++ ++ /*Extract ctr xlator options*/ ++ ret_db = extract_ctr_options(this, priv); ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED, ++ "Failed extracting ctr xlator options"); ++ GF_FREE(priv); ++ return -1; ++ } ++ ++ if (!priv->enabled) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, ++ "CTR Xlator is not enabled so skip ctr init"); ++ goto out; ++ } ++ ++ ret_db = initialize_ctr_resource(this, priv); ++ if (ret_db) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, ++ "FATAL: Failed ctr initialize resource"); ++ return -1; ++ } ++ ++out: ++ this->private = (void *)priv; ++ return 0; ++} ++ ++int ++notify(xlator_t *this, int event, void *data, ...) ++{ ++ gf_ctr_private_t *priv = NULL; ++ int ret = 0; ++ ++ priv = this->private; ++ ++ if (!priv) ++ goto out; ++ ++ ret = default_notify(this, event, data); ++ ++out: ++ return ret; ++} ++ ++int32_t ++mem_acct_init(xlator_t *this) ++{ ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ ++ ret = xlator_mem_acct_init(this, gf_ctr_mt_end + 1); ++ ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_MEM_ACC_INIT_FAILED, ++ "Memory accounting init" ++ "failed"); ++ return ret; ++ } ++out: ++ return ret; ++} ++ ++void ++fini(xlator_t *this) ++{ ++ gf_ctr_private_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (priv && priv->enabled) { ++ if (fini_db(priv->_db_conn)) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED, ++ "Failed closing " ++ "db connection"); ++ } ++ ++ if (priv->_db_conn) ++ priv->_db_conn = NULL; ++ ++ GF_FREE(priv->ctr_db_path); ++ if (pthread_mutex_destroy(&priv->compact_lock)) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED, ++ "Failed to " ++ "destroy the compaction mutex"); ++ } ++ } ++ GF_FREE(priv); ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; ++ ++ return; ++} ++ ++struct xlator_fops fops = { ++ /*lookup*/ ++ .lookup = ctr_lookup, ++ /*write fops */ ++ .mknod = ctr_mknod, ++ .create = ctr_create, ++ .truncate = ctr_truncate, ++ .ftruncate = ctr_ftruncate, ++ .setxattr = ctr_setxattr, ++ .fsetxattr = ctr_fsetxattr, ++ .removexattr = ctr_removexattr, ++ .fremovexattr = ctr_fremovexattr, ++ .unlink = ctr_unlink, ++ .link = ctr_link, ++ .rename = ctr_rename, ++ .writev = ctr_writev, ++ .setattr = ctr_setattr, ++ .fsetattr = ctr_fsetattr, ++ /*read fops*/ ++ .readv = ctr_readv, ++ /* IPC call*/ ++ .ipc = ctr_ipc}; ++ ++struct xlator_cbks cbks = {.forget = ctr_forget}; ++ ++struct volume_options options[] = { ++ {.key = ++ { ++ "ctr-enabled", ++ }, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .description = "Enables the CTR", ++ .flags = OPT_FLAG_SETTABLE}, ++ {.key = {"record-entry"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "on"}, ++ {.key = {"record-exit"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off"}, ++ {.key = {"record-counters"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .flags = OPT_FLAG_SETTABLE, ++ .tags = {}}, ++ {.key = {"ctr-record-metadata-heat"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {"ctr_link_consistency"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {"ctr_lookupheal_link_timeout"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "300", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_2}, ++ .tags = {}}, ++ {.key = {"ctr_lookupheal_inode_timeout"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "300", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_2}, ++ .tags = {}}, ++ {.key = {"hot-brick"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .value = {"on", "off"}, ++ .default_value = "off"}, ++ {.key = {"db-type"}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"hashfile", "rocksdb", "changelog", "sqlite3", "hyperdex"}, ++ .default_value = "sqlite3", ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .flags = OPT_FLAG_SETTABLE, ++ .tags = {}}, ++ {.key = {"db-sync"}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"sync", "async"}, ++ .default_value = "sync"}, ++ {.key = {"db-path"}, .type = GF_OPTION_TYPE_PATH}, ++ {.key = {"db-name"}, .type = GF_OPTION_TYPE_STR}, ++ {.key = {GFDB_SQL_PARAM_SYNC}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"off", "normal", "full"}, ++ .default_value = "normal"}, ++ {.key = {GFDB_SQL_PARAM_JOURNAL_MODE}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"delete", "truncate", "persist", "memory", "wal", "off"}, ++ .default_value = "wal", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_AUTO_VACUUM}, ++ .type = GF_OPTION_TYPE_STR, ++ .value = {"off", "full", "incr"}, ++ .default_value = "off", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_WAL_AUTOCHECK}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "25000", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_CACHE_SIZE}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "12500", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {GFDB_SQL_PARAM_PAGE_SIZE}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "4096", ++ .flags = OPT_FLAG_SETTABLE, ++ .op_version = {GD_OP_VERSION_3_7_0}, ++ .tags = {}}, ++ {.key = {NULL}}, ++}; ++ ++xlator_api_t xlator_api = { ++ .init = init, ++ .fini = fini, ++ .notify = notify, ++ .reconfigure = reconfigure, ++ .mem_acct_init = mem_acct_init, ++ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */ ++ .fops = &fops, ++ .cbks = &cbks, ++ .identifier = "changetimerecorder", ++ .category = GF_MAINTAINED, ++ .options = options, ++}; +diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h +new file mode 100644 +index 0000000..0150a1c +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/changetimerecorder.h +@@ -0,0 +1,21 @@ ++/* ++ Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_H ++#define __CTR_H ++ ++#include <glusterfs/glusterfs.h> ++#include <glusterfs/xlator.h> ++#include <glusterfs/logging.h> ++#include <glusterfs/common-utils.h> ++#include "ctr_mem_types.h" ++#include "ctr-helper.h" ++ ++#endif /* __CTR_H */ +diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c +new file mode 100644 +index 0000000..e1e6573 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-helper.c +@@ -0,0 +1,293 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "gfdb_sqlite3.h" ++#include "ctr-helper.h" ++#include "ctr-messages.h" ++ ++/******************************************************************************* ++ * ++ * Fill unwind into db record ++ * ++ ******************************************************************************/ ++int ++fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path) ++{ ++ int ret = -1; ++ gfdb_time_t *ctr_uwtime = NULL; ++ gf_ctr_private_t *_priv = NULL; ++ ++ GF_ASSERT(this); ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(ctr_local); ++ ++ /*If not unwind path error*/ ++ if (!isunwindpath(fop_path)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH, ++ "Wrong fop_path. Should be unwind"); ++ goto out; ++ } ++ ++ ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time; ++ CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path; ++ CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type; ++ ++ ret = gettimeofday(ctr_uwtime, NULL); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, ++ "Error " ++ "filling unwind time record %s", ++ strerror(errno)); ++ goto out; ++ } ++ ++ /* Special case i.e if its a tier rebalance ++ * + cold tier brick ++ * + its a create/mknod FOP ++ * we record unwind time as zero */ ++ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG && ++ (!_priv->ctr_hot_brick) && isdentrycreatefop(fop_type)) { ++ memset(ctr_uwtime, 0, sizeof(*ctr_uwtime)); ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++/******************************************************************************* ++ * ++ * Fill wind into db record ++ * ++ ******************************************************************************/ ++int ++fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gf_ctr_inode_context_t *ctr_inode_cx) ++{ ++ int ret = -1; ++ gfdb_time_t *ctr_wtime = NULL; ++ gf_ctr_private_t *_priv = NULL; ++ ++ GF_ASSERT(this); ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ GF_ASSERT(ctr_local); ++ IS_CTR_INODE_CX_SANE(ctr_inode_cx); ++ ++ /*if not wind path error!*/ ++ if (!iswindpath(ctr_inode_cx->fop_path)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH, ++ "Wrong fop_path. Should be wind"); ++ goto out; ++ } ++ ++ ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time; ++ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; ++ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; ++ CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency; ++ ++ ret = gettimeofday(ctr_wtime, NULL); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, ++ "Error filling wind time record %s", strerror(errno)); ++ goto out; ++ } ++ ++ /* Special case i.e if its a tier rebalance ++ * + cold tier brick ++ * + its a create/mknod FOP ++ * we record wind time as zero */ ++ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG && ++ (!_priv->ctr_hot_brick) && isdentrycreatefop(ctr_inode_cx->fop_type)) { ++ memset(ctr_wtime, 0, sizeof(*ctr_wtime)); ++ } ++ ++ /* Copy gfid into db record */ ++ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid)); ++ ++ /* Copy older gfid if any */ ++ if (ctr_inode_cx->old_gfid && ++ (!gf_uuid_is_null(*(ctr_inode_cx->old_gfid)))) { ++ gf_uuid_copy(CTR_DB_REC(ctr_local).old_gfid, *(ctr_inode_cx->old_gfid)); ++ } ++ ++ /*Hard Links*/ ++ if (isdentryfop(ctr_inode_cx->fop_type)) { ++ /*new link fop*/ ++ if (NEW_LINK_CX(ctr_inode_cx)) { ++ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid, ++ *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); ++ strcpy(CTR_DB_REC(ctr_local).file_name, ++ NEW_LINK_CX(ctr_inode_cx)->basename); ++ } ++ /*rename fop*/ ++ if (OLD_LINK_CX(ctr_inode_cx)) { ++ gf_uuid_copy(CTR_DB_REC(ctr_local).old_pargfid, ++ *((OLD_LINK_CX(ctr_inode_cx))->pargfid)); ++ strcpy(CTR_DB_REC(ctr_local).old_file_name, ++ OLD_LINK_CX(ctr_inode_cx)->basename); ++ } ++ } ++ ++ ret = 0; ++out: ++ /*On error roll back and clean the record*/ ++ if (ret == -1) { ++ CLEAR_CTR_DB_RECORD(ctr_local); ++ } ++ return ret; ++} ++ ++/****************************************************************************** ++ * ++ * CTR xlator init related functions ++ * ++ * ++ * ****************************************************************************/ ++static int ++extract_sql_params(xlator_t *this, dict_t *params_dict) ++{ ++ int ret = -1; ++ char *db_path = NULL; ++ char *db_name = NULL; ++ char *db_full_path = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(params_dict); ++ ++ /*Extract the path of the db*/ ++ db_path = NULL; ++ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path", ++ db_path, "/var/run/gluster/"); ++ ++ /*Extract the name of the db*/ ++ db_name = NULL; ++ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name", ++ db_name, "gf_ctr_db.db"); ++ ++ /*Construct full path of the db*/ ++ ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name); ++ if (ret < 0) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, ++ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, ++ "Construction of full db path failed!"); ++ goto out; ++ } ++ ++ /*Setting the SQL DB Path*/ ++ SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH, ++ db_full_path, ret, out); ++ ++ /*Extract rest of the sql params*/ ++ ret = gfdb_set_sql_params(this->name, this->options, params_dict); ++ if (ret) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ "Failed setting values to sql param dict!"); ++ } ++ ++ ret = 0; ++ ++out: ++ if (ret) ++ GF_FREE(db_full_path); ++ return ret; ++} ++ ++int ++extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type) ++{ ++ int ret = -1; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(params_dict); ++ ++ switch (db_type) { ++ case GFDB_SQLITE3: ++ ret = extract_sql_params(this, params_dict); ++ if (ret) ++ goto out; ++ break; ++ case GFDB_ROCKS_DB: ++ case GFDB_HYPERDEX: ++ case GFDB_HASH_FILE_STORE: ++ case GFDB_INVALID_DB: ++ case GFDB_DB_END: ++ goto out; ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++int ++extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv) ++{ ++ int ret = -1; ++ char *_val_str = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(_priv); ++ ++ /*Checking if the CTR Translator is enabled. By default its disabled*/ ++ _priv->enabled = _gf_false; ++ GF_OPTION_INIT("ctr-enabled", _priv->enabled, bool, out); ++ if (!_priv->enabled) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, ++ "CTR Xlator is disabled."); ++ ret = 0; ++ goto out; ++ } ++ ++ /*Extract db type*/ ++ GF_OPTION_INIT("db-type", _val_str, str, out); ++ _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str); ++ ++ /*Extract flag for record on wind*/ ++ GF_OPTION_INIT("record-entry", _priv->ctr_record_wind, bool, out); ++ ++ /*Extract flag for record on unwind*/ ++ GF_OPTION_INIT("record-exit", _priv->ctr_record_unwind, bool, out); ++ ++ /*Extract flag for record on counters*/ ++ GF_OPTION_INIT("record-counters", _priv->ctr_record_counter, bool, out); ++ ++ /* Extract flag for record metadata heat */ ++ GF_OPTION_INIT("ctr-record-metadata-heat", _priv->ctr_record_metadata_heat, ++ bool, out); ++ ++ /*Extract flag for link consistency*/ ++ GF_OPTION_INIT("ctr_link_consistency", _priv->ctr_link_consistency, bool, ++ out); ++ ++ /*Extract ctr_lookupheal_inode_timeout */ ++ GF_OPTION_INIT("ctr_lookupheal_inode_timeout", ++ _priv->ctr_lookupheal_inode_timeout, uint64, out); ++ ++ /*Extract ctr_lookupheal_link_timeout*/ ++ GF_OPTION_INIT("ctr_lookupheal_link_timeout", ++ _priv->ctr_lookupheal_link_timeout, uint64, out); ++ ++ /*Extract flag for hot tier brick*/ ++ GF_OPTION_INIT("hot-brick", _priv->ctr_hot_brick, bool, out); ++ ++ /*Extract flag for sync mode*/ ++ GF_OPTION_INIT("db-sync", _val_str, str, out); ++ _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str); ++ ++ ret = 0; ++ ++out: ++ return ret; ++} +diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h +new file mode 100644 +index 0000000..517fbb0 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-helper.h +@@ -0,0 +1,854 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_HELPER_H ++#define __CTR_HELPER_H ++ ++#include <glusterfs/xlator.h> ++#include "ctr_mem_types.h" ++#include <glusterfs/iatt.h> ++#include <glusterfs/glusterfs.h> ++#include <glusterfs/xlator.h> ++#include <glusterfs/defaults.h> ++#include <glusterfs/logging.h> ++#include <glusterfs/common-utils.h> ++#include <time.h> ++#include <sys/time.h> ++#include <pthread.h> ++ ++#include "gfdb_data_store.h" ++#include "ctr-xlator-ctx.h" ++#include "ctr-messages.h" ++ ++#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */ ++#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */ ++ ++typedef struct ctr_query_cbk_args { ++ int query_fd; ++ int count; ++} ctr_query_cbk_args_t; ++ ++/*CTR Xlator Private structure*/ ++typedef struct gf_ctr_private { ++ gf_boolean_t enabled; ++ char *ctr_db_path; ++ gf_boolean_t ctr_hot_brick; ++ gf_boolean_t ctr_record_wind; ++ gf_boolean_t ctr_record_unwind; ++ gf_boolean_t ctr_record_counter; ++ gf_boolean_t ctr_record_metadata_heat; ++ gf_boolean_t ctr_link_consistency; ++ gfdb_db_type_t gfdb_db_type; ++ gfdb_sync_type_t gfdb_sync_type; ++ gfdb_conn_node_t *_db_conn; ++ uint64_t ctr_lookupheal_link_timeout; ++ uint64_t ctr_lookupheal_inode_timeout; ++ gf_boolean_t compact_active; ++ gf_boolean_t compact_mode_switched; ++ pthread_mutex_t compact_lock; ++} gf_ctr_private_t; ++ ++/* ++ * gf_ctr_local_t is the ctr xlator local data structure that is stored in ++ * the call_frame of each FOP. ++ * ++ * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is ++ * used by the insert_record() api from the libgfdb. The gfdb_db_record object ++ * will contain all the inode and hardlink(only for dentry fops: create, ++ * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind ++ * call and will be release during the unwind. The same gfdb_db_record will ++ * used for the unwind insert_record() api, to record unwind in the database. ++ * ++ * ia_inode_type in gf_ctr_local will tell the type of the inode. This is ++ * important for during the unwind path. As we will not have the inode during ++ * the unwind path. We would have include this in the gfdb_db_record itself ++ * but currently we record only file inode information. ++ * ++ * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and ++ * take special/no action. We don't record change/access times or increement ++ * heat counter for internal fops from rebalancer. ++ * */ ++typedef struct gf_ctr_local { ++ gfdb_db_record_t gfdb_db_record; ++ ia_type_t ia_inode_type; ++ gf_boolean_t is_internal_fop; ++ gf_special_pid_t client_pid; ++} gf_ctr_local_t; ++/* ++ * Easy access of gfdb_db_record of ctr_local ++ * */ ++#define CTR_DB_REC(ctr_local) (ctr_local->gfdb_db_record) ++ ++/*Clear db record*/ ++#define CLEAR_CTR_DB_RECORD(ctr_local) \ ++ do { \ ++ ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID; \ ++ memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time), 0, \ ++ sizeof(gfdb_time_t)); \ ++ memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time), 0, \ ++ sizeof(gfdb_time_t)); \ ++ gf_uuid_clear(ctr_local->gfdb_db_record.gfid); \ ++ gf_uuid_clear(ctr_local->gfdb_db_record.pargfid); \ ++ memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1); \ ++ memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1); \ ++ ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP; \ ++ ctr_local->ia_inode_type = IA_INVAL; \ ++ } while (0) ++ ++static gf_ctr_local_t * ++init_ctr_local_t(xlator_t *this) ++{ ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(this); ++ ++ ctr_local = mem_get0(this->local_pool); ++ if (!ctr_local) { ++ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "Error while creating ctr local"); ++ goto out; ++ } ++ ++ CLEAR_CTR_DB_RECORD(ctr_local); ++out: ++ return ctr_local; ++} ++ ++static void ++free_ctr_local(gf_ctr_local_t *ctr_local) ++{ ++ if (ctr_local) ++ mem_put(ctr_local); ++} ++ ++/****************************************************************************** ++ * ++ * ++ * Context Carrier Structures ++ * ++ * ++ * ****************************************************************************/ ++ ++/* ++ * Context Carrier structures are used to carry relevant information about ++ * inodes and links from the fops calls to the ctr_insert_wind. ++ * These structure just have pointers to the original data and donot ++ * do a deep copy of any data. This info is deep copied to ++ * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This ++ * info remains persistent for the unwind in ctr_local->gfdb_db_record ++ * and once used will be destroyed. ++ * ++ * gf_ctr_link_context_t : Context structure for hard links ++ * gf_ctr_inode_context_t : Context structure for inodes ++ * ++ * */ ++ ++/*Context Carrier Structure for hard links*/ ++typedef struct gf_ctr_link_context { ++ uuid_t *pargfid; ++ const char *basename; ++} gf_ctr_link_context_t; ++ ++/*Context Carrier Structure for inodes*/ ++typedef struct gf_ctr_inode_context { ++ ia_type_t ia_type; ++ uuid_t *gfid; ++ uuid_t *old_gfid; ++ gf_ctr_link_context_t *new_link_cx; ++ gf_ctr_link_context_t *old_link_cx; ++ gfdb_fop_type_t fop_type; ++ gfdb_fop_path_t fop_path; ++ gf_boolean_t is_internal_fop; ++ /* Indicating metadata fops */ ++ gf_boolean_t is_metadata_fop; ++} gf_ctr_inode_context_t; ++ ++/*******************Util Macros for Context Carrier Structures*****************/ ++ ++/*Checks if ctr_link_cx is sane!*/ ++#define IS_CTR_LINK_CX_SANE(ctr_link_cx) \ ++ do { \ ++ if (ctr_link_cx) { \ ++ if (ctr_link_cx->pargfid) \ ++ GF_ASSERT(*(ctr_link_cx->pargfid)); \ ++ GF_ASSERT(ctr_link_cx->basename); \ ++ }; \ ++ } while (0) ++ ++/*Clear and fill the ctr_link_context with values*/ ++#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label) \ ++ do { \ ++ GF_VALIDATE_OR_GOTO("ctr", ctr_link_cx, label); \ ++ GF_VALIDATE_OR_GOTO("ctr", _pargfid, label); \ ++ GF_VALIDATE_OR_GOTO("ctr", _basename, label); \ ++ memset(ctr_link_cx, 0, sizeof(*ctr_link_cx)); \ ++ ctr_link_cx->pargfid = &_pargfid; \ ++ ctr_link_cx->basename = _basename; \ ++ } while (0) ++ ++#define NEW_LINK_CX(ctr_inode_cx) ctr_inode_cx->new_link_cx ++ ++#define OLD_LINK_CX(ctr_inode_cx) ctr_inode_cx->old_link_cx ++ ++/*Checks if ctr_inode_cx is sane!*/ ++#define IS_CTR_INODE_CX_SANE(ctr_inode_cx) \ ++ do { \ ++ GF_ASSERT(ctr_inode_cx); \ ++ GF_ASSERT(ctr_inode_cx->gfid); \ ++ GF_ASSERT(*(ctr_inode_cx->gfid)); \ ++ GF_ASSERT(ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP); \ ++ GF_ASSERT(ctr_inode_cx->fop_path != GFDB_FOP_INVALID); \ ++ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \ ++ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \ ++ } while (0) ++ ++/*Clear and fill the ctr_inode_context with values*/ ++#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx, _ia_type, _gfid, _new_link_cx, \ ++ _old_link_cx, _fop_type, _fop_path) \ ++ do { \ ++ GF_ASSERT(ctr_inode_cx); \ ++ GF_ASSERT(_gfid); \ ++ GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP); \ ++ GF_ASSERT(_fop_path != GFDB_FOP_INVALID); \ ++ memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx)); \ ++ ctr_inode_cx->ia_type = _ia_type; \ ++ ctr_inode_cx->gfid = &_gfid; \ ++ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \ ++ if (_new_link_cx) \ ++ NEW_LINK_CX(ctr_inode_cx) = _new_link_cx; \ ++ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \ ++ if (_old_link_cx) \ ++ OLD_LINK_CX(ctr_inode_cx) = _old_link_cx; \ ++ ctr_inode_cx->fop_type = _fop_type; \ ++ ctr_inode_cx->fop_path = _fop_path; \ ++ } while (0) ++ ++/****************************************************************************** ++ * ++ * Util functions or macros used by ++ * insert wind and insert unwind ++ * ++ * ****************************************************************************/ ++/* Free ctr frame local */ ++static inline void ++ctr_free_frame_local(call_frame_t *frame) ++{ ++ if (frame) { ++ free_ctr_local((gf_ctr_local_t *)frame->local); ++ frame->local = NULL; ++ } ++} ++ ++/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict ++ * that has to be sent to POSIX Xlator to send ++ * link count in unwind path. ++ * return 0 for success with not creation of dict ++ * return 1 for success with creation of dict ++ * return -1 for failure. ++ * */ ++static inline int ++set_posix_link_request(xlator_t *this, dict_t **xdata) ++{ ++ int ret = -1; ++ gf_boolean_t is_created = _gf_false; ++ ++ GF_VALIDATE_OR_GOTO("ctr", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, xdata, out); ++ ++ /*create xdata if NULL*/ ++ if (!*xdata) { ++ *xdata = dict_new(); ++ is_created = _gf_true; ++ ret = 1; ++ } else { ++ ret = 0; ++ } ++ ++ if (!*xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL, ++ "xdata is NULL :Cannot send " ++ "GF_REQUEST_LINK_COUNT_XDATA to posix"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_set_int32(*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ "Failed setting GF_REQUEST_LINK_COUNT_XDATA"); ++ ret = -1; ++ goto out; ++ } ++ ret = 0; ++out: ++ if (ret == -1) { ++ if (*xdata && is_created) { ++ dict_unref(*xdata); ++ } ++ } ++ return ret; ++} ++ ++/* ++ * If a bitrot fop ++ * */ ++#define BITROT_FOP(frame) \ ++ (frame->root->pid == GF_CLIENT_PID_BITD || \ ++ frame->root->pid == GF_CLIENT_PID_SCRUB) ++ ++/* ++ * If a rebalancer fop ++ * */ ++#define REBALANCE_FOP(frame) (frame->root->pid == GF_CLIENT_PID_DEFRAG) ++ ++/* ++ * If its a tiering rebalancer fop ++ * */ ++#define TIER_REBALANCE_FOP(frame) \ ++ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) ++ ++/* ++ * If its a AFR SELF HEAL ++ * */ ++#define AFR_SELF_HEAL_FOP(frame) (frame->root->pid == GF_CLIENT_PID_SELF_HEALD) ++ ++/* ++ * if a rebalancer fop goto ++ * */ ++#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label) \ ++ do { \ ++ if (REBALANCE_FOP(frame)) \ ++ goto label; \ ++ } while (0) ++ ++/* ++ * Internal fop ++ * ++ * */ ++static inline gf_boolean_t ++is_internal_fop(call_frame_t *frame, dict_t *xdata) ++{ ++ gf_boolean_t ret = _gf_false; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ ++ if (AFR_SELF_HEAL_FOP(frame)) { ++ ret = _gf_true; ++ } ++ if (BITROT_FOP(frame)) { ++ ret = _gf_true; ++ } ++ if (REBALANCE_FOP(frame) || TIER_REBALANCE_FOP(frame)) { ++ ret = _gf_true; ++ if (xdata && dict_get(xdata, CTR_ATTACH_TIER_LOOKUP)) { ++ ret = _gf_false; ++ } ++ } ++ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { ++ ret = _gf_true; ++ } ++ ++ return ret; ++} ++ ++#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \ ++ do { \ ++ if (is_internal_fop(frame, dict)) \ ++ goto label; \ ++ } while (0) ++ ++/* if fop has failed exit */ ++#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label) \ ++ do { \ ++ if (op_ret == -1) { \ ++ gf_msg_trace(this->name, 0, "Failed fop with %s", \ ++ strerror(op_errno)); \ ++ goto label; \ ++ }; \ ++ } while (0) ++ ++/* ++ * IS CTR Xlator is disabled then goto to label ++ * */ ++#define CTR_IS_DISABLED_THEN_GOTO(this, label) \ ++ do { \ ++ gf_ctr_private_t *_priv = NULL; \ ++ GF_ASSERT(this); \ ++ GF_ASSERT(this->private); \ ++ _priv = this->private; \ ++ if (!_priv->_db_conn) \ ++ goto label; \ ++ } while (0) ++ ++/* ++ * IS CTR record metadata heat is disabled then goto to label ++ * */ ++#define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label) \ ++ do { \ ++ gf_ctr_private_t *_priv = NULL; \ ++ GF_ASSERT(this); \ ++ GF_ASSERT(this->private); \ ++ _priv = this->private; \ ++ if (!_priv->ctr_record_metadata_heat) \ ++ goto label; \ ++ } while (0) ++ ++int ++fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path); ++ ++int ++fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local, ++ gf_ctr_inode_context_t *ctr_inode_cx); ++ ++/******************************************************************************* ++ * CTR INSERT WIND ++ * ***************************************************************************** ++ * Function used to insert/update record into the database during a wind fop ++ * This function creates ctr_local structure into the frame of the fop ++ * call. ++ * ****************************************************************************/ ++ ++static inline int ++ctr_insert_wind(call_frame_t *frame, xlator_t *this, ++ gf_ctr_inode_context_t *ctr_inode_cx) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(frame->root); ++ GF_ASSERT(this); ++ IS_CTR_INODE_CX_SANE(ctr_inode_cx); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(_priv->_db_conn); ++ ++ /*If record_wind option of CTR is on record wind for ++ * regular files only*/ ++ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { ++ frame->local = init_ctr_local_t(this); ++ if (!frame->local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error while creating ctr local"); ++ goto out; ++ }; ++ ctr_local = frame->local; ++ ctr_local->client_pid = frame->root->pid; ++ ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop; ++ ++ /* Decide whether to record counters or not */ ++ CTR_DB_REC(ctr_local).do_record_counters = _gf_false; ++ /* If record counter is enabled */ ++ if (_priv->ctr_record_counter) { ++ /* If not a internal fop */ ++ if (!(ctr_local->is_internal_fop)) { ++ /* If its a metadata fop AND ++ * record metadata heat ++ * OR ++ * its NOT a metadata fop */ ++ if ((ctr_inode_cx->is_metadata_fop && ++ _priv->ctr_record_metadata_heat) || ++ (!ctr_inode_cx->is_metadata_fop)) { ++ CTR_DB_REC(ctr_local).do_record_counters = _gf_true; ++ } ++ } ++ } ++ ++ /* Decide whether to record times or not ++ * For non internal FOPS record times as usual*/ ++ CTR_DB_REC(ctr_local).do_record_times = _gf_false; ++ if (!ctr_local->is_internal_fop) { ++ /* If its a metadata fop AND ++ * record metadata heat ++ * OR ++ * its NOT a metadata fop */ ++ if ((ctr_inode_cx->is_metadata_fop && ++ _priv->ctr_record_metadata_heat) || ++ (!ctr_inode_cx->is_metadata_fop)) { ++ CTR_DB_REC(ctr_local).do_record_times = ++ (_priv->ctr_record_wind || _priv->ctr_record_unwind); ++ } ++ } ++ /* when its a internal FOPS*/ ++ else { ++ /* Record times only for create ++ * i.e when the inode is created */ ++ CTR_DB_REC(ctr_local).do_record_times = (isdentrycreatefop( ++ ctr_inode_cx->fop_type)) ++ ? _gf_true ++ : _gf_false; ++ } ++ ++ /*Fill the db record for insertion*/ ++ ret = fill_db_record_for_wind(this, ctr_local, ctr_inode_cx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND, ++ "WIND: Error filling ctr local"); ++ goto out; ++ } ++ ++ /*Insert the db record*/ ++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_INSERT_RECORD_WIND_FAILED, ++ "WIND: Inserting of record failed!"); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ ++ if (ret) { ++ free_ctr_local(ctr_local); ++ frame->local = NULL; ++ } ++ ++ return ret; ++} ++ ++/******************************************************************************* ++ * CTR INSERT UNWIND ++ * ***************************************************************************** ++ * Function used to insert/update record into the database during a unwind fop ++ * This function destroys ctr_local structure into the frame of the fop ++ * call at the end. ++ * ****************************************************************************/ ++static inline int ++ctr_insert_unwind(call_frame_t *frame, xlator_t *this, gfdb_fop_type_t fop_type, ++ gfdb_fop_path_t fop_path) ++{ ++ int ret = -1; ++ gf_ctr_private_t *_priv = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ ++ _priv = this->private; ++ GF_ASSERT(_priv); ++ ++ GF_ASSERT(_priv->_db_conn); ++ ++ ctr_local = frame->local; ++ ++ if (ctr_local && (_priv->ctr_record_unwind || isdentryfop(fop_type)) && ++ (ctr_local->ia_inode_type != IA_IFDIR)) { ++ CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind; ++ ++ ret = fill_db_record_for_unwind(this, ctr_local, fop_type, fop_path); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, ++ "UNWIND: Error filling ctr local"); ++ goto out; ++ } ++ ++ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, ++ "UNWIND: Error filling ctr local"); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++/****************************************************************************** ++ * Delete file/flink record/s from db ++ * ****************************************************************************/ ++static inline int ++ctr_delete_hard_link_from_db(xlator_t *this, uuid_t gfid, uuid_t pargfid, ++ char *basename, gfdb_fop_type_t fop_type, ++ gfdb_fop_path_t fop_path) ++{ ++ int ret = -1; ++ gfdb_db_record_t gfdb_db_record; ++ gf_ctr_private_t *_priv = NULL; ++ ++ _priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, _priv, out); ++ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(gfid)), out); ++ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(pargfid)), out); ++ GF_VALIDATE_OR_GOTO(this->name, (fop_type == GFDB_FOP_DENTRY_WRITE), out); ++ GF_VALIDATE_OR_GOTO( ++ this->name, (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL), out); ++ ++ /* Set gfdb_db_record to 0 */ ++ memset(&gfdb_db_record, 0, sizeof(gfdb_db_record)); ++ ++ /* Copy basename */ ++ if (snprintf(gfdb_db_record.file_name, GF_NAME_MAX, "%s", basename) >= ++ GF_NAME_MAX) ++ goto out; ++ ++ /* Copy gfid into db record */ ++ gf_uuid_copy(gfdb_db_record.gfid, gfid); ++ ++ /* Copy pargid into db record */ ++ gf_uuid_copy(gfdb_db_record.pargfid, pargfid); ++ ++ gfdb_db_record.gfdb_fop_path = fop_path; ++ gfdb_db_record.gfdb_fop_type = fop_type; ++ ++ /*send delete request to db*/ ++ ret = insert_record(_priv->_db_conn, &gfdb_db_record); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RECORD_WIND_FAILED, ++ "Failed to delete record. %s", basename); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++/******************************* Hard link function ***************************/ ++ ++static inline gf_boolean_t ++__is_inode_expired(ctr_xlator_ctx_t *ctr_xlator_ctx, gf_ctr_private_t *_priv, ++ gfdb_time_t *current_time) ++{ ++ gf_boolean_t ret = _gf_false; ++ uint64_t time_diff = 0; ++ ++ GF_ASSERT(ctr_xlator_ctx); ++ GF_ASSERT(_priv); ++ GF_ASSERT(current_time); ++ ++ time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period; ++ ++ ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true ++ : _gf_false; ++ return ret; ++} ++ ++static inline gf_boolean_t ++__is_hardlink_expired(ctr_hard_link_t *ctr_hard_link, gf_ctr_private_t *_priv, ++ gfdb_time_t *current_time) ++{ ++ gf_boolean_t ret = _gf_false; ++ uint64_t time_diff = 0; ++ ++ GF_ASSERT(ctr_hard_link); ++ GF_ASSERT(_priv); ++ GF_ASSERT(current_time); ++ ++ time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period; ++ ++ ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true ++ : _gf_false; ++ ++ return ret; ++} ++ ++/* Return values of heal*/ ++typedef enum ctr_heal_ret_val { ++ CTR_CTX_ERROR = -1, ++ /* No healing required */ ++ CTR_TRY_NO_HEAL = 0, ++ /* Try healing hard link */ ++ CTR_TRY_HARDLINK_HEAL = 1, ++ /* Try healing inode */ ++ CTR_TRY_INODE_HEAL = 2, ++} ctr_heal_ret_val_t; ++ ++/** ++ * @brief Function to add hard link to the inode context variable. ++ * The inode context maintainences a in-memory list. This is used ++ * smart healing of database. ++ * @param frame of the FOP ++ * @param this is the Xlator instant ++ * @param inode ++ * @return Return ctr_heal_ret_val_t ++ */ ++ ++static inline ctr_heal_ret_val_t ++add_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) ++{ ++ ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL; ++ int ret = -1; ++ gf_ctr_local_t *ctr_local = NULL; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ gf_ctr_private_t *_priv = NULL; ++ gfdb_time_t current_time = {0}; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ GF_ASSERT(this->private); ++ ++ _priv = this->private; ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ goto out; ++ } ++ ++ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, ++ "Failed accessing ctr inode context"); ++ goto out; ++ } ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ /* Check if the hard link already exists ++ * in the ctr inode context*/ ++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, ++ CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name); ++ /* if there then ignore */ ++ if (ctr_hard_link) { ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ ret_val = CTR_CTX_ERROR; ++ goto unlock; ++ } ++ ++ if (__is_hardlink_expired(ctr_hard_link, _priv, ¤t_time)) { ++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; ++ ret_val = ret_val | CTR_TRY_HARDLINK_HEAL; ++ } ++ ++ if (__is_inode_expired(ctr_xlator_ctx, _priv, ¤t_time)) { ++ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec; ++ ret_val = ret_val | CTR_TRY_INODE_HEAL; ++ } ++ ++ goto unlock; ++ } ++ ++ /* Add the hard link to the list*/ ++ ret = ctr_add_hard_link(this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, ++ "Failed to add hardlink to the ctr inode context"); ++ ret_val = CTR_CTX_ERROR; ++ goto unlock; ++ } ++ ++ ret_val = CTR_TRY_NO_HEAL; ++unlock: ++ UNLOCK(&ctr_xlator_ctx->lock); ++out: ++ return ret_val; ++} ++ ++static inline int ++delete_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) ++{ ++ int ret = -1; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ goto out; ++ } ++ ++ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ /* Since there is no ctr inode context so nothing more to do */ ++ ret = 0; ++ goto out; ++ } ++ ++ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, ++ CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed to delete hard link"); ++ goto out; ++ } ++ ++ ret = 0; ++ ++out: ++ return ret; ++} ++ ++static inline int ++update_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) ++{ ++ int ret = -1; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ gf_ctr_local_t *ctr_local = NULL; ++ ++ GF_ASSERT(frame); ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ ctr_local = frame->local; ++ if (!ctr_local) { ++ goto out; ++ } ++ ++ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode); ++ if (!ctr_xlator_ctx) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, ++ "Failed accessing ctr inode context"); ++ goto out; ++ } ++ ++ ret = ctr_update_hard_link( ++ this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid, ++ CTR_DB_REC(ctr_local).file_name, CTR_DB_REC(ctr_local).old_pargfid, ++ CTR_DB_REC(ctr_local).old_file_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed to delete hard link"); ++ goto out; ++ } ++ ++ ret = 0; ++ ++out: ++ return ret; ++} ++ ++/****************************************************************************** ++ * ++ * CTR xlator init related functions ++ * ++ * ++ * ****************************************************************************/ ++int ++extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type); ++ ++int ++extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv); ++ ++#endif +diff --git a/xlators/features/changetimerecorder/src/ctr-messages.h b/xlators/features/changetimerecorder/src/ctr-messages.h +new file mode 100644 +index 0000000..23adf0a +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-messages.h +@@ -0,0 +1,61 @@ ++/* ++ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++ */ ++ ++#ifndef _CTR_MESSAGES_H_ ++#define _CTR_MESSAGES_H_ ++ ++#include <glusterfs/glfs-message-id.h> ++ ++/* To add new message IDs, append new identifiers at the end of the list. ++ * ++ * Never remove a message ID. If it's not used anymore, you can rename it or ++ * leave it as it is, but not delete it. This is to prevent reutilization of ++ * IDs by other messages. ++ * ++ * The component name must match one of the entries defined in ++ * glfs-message-id.h. ++ */ ++ ++GLFS_MSGID( ++ CTR, CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, ++ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND, ++ CTR_MSG_INSERT_LINK_WIND_FAILED, CTR_MSG_INSERT_WRITEV_WIND_FAILED, ++ CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, CTR_MSG_INSERT_SETATTR_WIND_FAILED, ++ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, ++ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED, ++ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED, ++ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED, ++ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED, ++ CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED, ++ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED, ++ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, CTR_MSG_INSERT_RENAME_WIND_FAILED, ++ CTR_MSG_INSERT_RENAME_UNWIND_FAILED, ++ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, CTR_MSG_ADD_HARDLINK_FAILED, ++ CTR_MSG_DELETE_HARDLINK_FAILED, CTR_MSG_UPDATE_HARDLINK_FAILED, ++ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, ++ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, CTR_MSG_INSERT_UNLINK_WIND_FAILED, ++ CTR_MSG_XDATA_NULL, CTR_MSG_INSERT_FSYNC_WIND_FAILED, ++ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED, ++ CTR_MSG_INSERT_MKNOD_WIND_FAILED, CTR_MSG_INSERT_CREATE_WIND_FAILED, ++ CTR_MSG_INSERT_CREATE_UNWIND_FAILED, CTR_MSG_INSERT_RECORD_WIND_FAILED, ++ CTR_MSG_INSERT_READV_WIND_FAILED, CTR_MSG_GET_GFID_FROM_DICT_FAILED, ++ CTR_MSG_SET, CTR_MSG_FATAL_ERROR, CTR_MSG_DANGLING_VOLUME, ++ CTR_MSG_CALLOC_FAILED, CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED, ++ CTR_MSG_INIT_DB_PARAMS_FAILED, CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED, ++ CTR_MSG_MEM_ACC_INIT_FAILED, CTR_MSG_CLOSE_DB_CONN_FAILED, ++ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, CTR_MSG_WRONG_FOP_PATH, ++ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, ++ CTR_MSG_XLATOR_DISABLED, CTR_MSG_HARDLINK_MISSING_IN_LIST, ++ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, CTR_MSG_INIT_LOCK_FAILED, ++ CTR_MSG_COPY_FAILED, CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED, ++ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, CTR_MSG_NULL_LOCAL); ++ ++#endif /* !_CTR_MESSAGES_H_ */ +diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c +new file mode 100644 +index 0000000..b6b66d5 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c +@@ -0,0 +1,362 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "ctr-xlator-ctx.h" ++#include "ctr-messages.h" ++#include <time.h> ++#include <sys/time.h> ++ ++#define IS_THE_ONLY_HARDLINK(ctr_hard_link) \ ++ (ctr_hard_link->list.next == ctr_hard_link->list.prev) ++ ++static void ++fini_ctr_hard_link(ctr_hard_link_t **ctr_hard_link) ++{ ++ GF_ASSERT(ctr_hard_link); ++ ++ if (*ctr_hard_link) ++ return; ++ GF_FREE((*ctr_hard_link)->base_name); ++ GF_FREE(*ctr_hard_link); ++ *ctr_hard_link = NULL; ++} ++ ++/* Please lock the ctr_xlator_ctx before using this function */ ++ctr_hard_link_t * ++ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name) ++{ ++ ctr_hard_link_t *_hard_link = NULL; ++ ctr_hard_link_t *searched_hardlink = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ if (pgfid == NULL || base_name == NULL) ++ goto out; ++ ++ /*linear search*/ ++ list_for_each_entry(_hard_link, &ctr_xlator_ctx->hardlink_list, list) ++ { ++ if (gf_uuid_compare(_hard_link->pgfid, pgfid) == 0 && ++ _hard_link->base_name && ++ strcmp(_hard_link->base_name, base_name) == 0) { ++ searched_hardlink = _hard_link; ++ break; ++ } ++ } ++ ++out: ++ return searched_hardlink; ++} ++ ++/* Please lock the ctr_xlator_ctx before using this function */ ++int ++ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ struct timeval current_time = {0}; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ if (pgfid == NULL || base_name == NULL) ++ goto out; ++ ++ ctr_hard_link = GF_CALLOC(1, sizeof(*ctr_hard_link), gf_ctr_mt_hard_link_t); ++ if (!ctr_hard_link) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED, ++ "Failed allocating " ++ "ctr_hard_link"); ++ goto out; ++ } ++ ++ /*Initialize the ctr_hard_link object and ++ * Assign the values : parent GFID and basename*/ ++ INIT_LIST_HEAD(&ctr_hard_link->list); ++ gf_uuid_copy(ctr_hard_link->pgfid, pgfid); ++ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED, ++ "Failed copying basename" ++ "to ctr_hard_link"); ++ goto error; ++ } ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ goto error; ++ } ++ ++ /*Add the hard link to the list*/ ++ list_add_tail(&ctr_hard_link->list, &ctr_xlator_ctx->hardlink_list); ++ ++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; ++ ++ /*aal izz well!*/ ++ ret = 0; ++ goto out; ++error: ++ GF_FREE(ctr_hard_link); ++out: ++ return ret; ++} ++ ++static void ++__delete_hard_link_from_list(ctr_hard_link_t **ctr_hard_link) ++{ ++ GF_ASSERT(ctr_hard_link); ++ GF_ASSERT(*ctr_hard_link); ++ ++ /*Remove hard link from list*/ ++ list_del(&(*ctr_hard_link)->list); ++ fini_ctr_hard_link(ctr_hard_link); ++} ++ ++int ++ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ /*Check if the hard link is present */ ++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, pgfid, ++ base_name); ++ if (!ctr_hard_link) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_HARDLINK_MISSING_IN_LIST, ++ "Hard link doesn't exist in the list"); ++ goto out; ++ } ++ ++ __delete_hard_link_from_list(&ctr_hard_link); ++ ctr_hard_link = NULL; ++ ++ ret = 0; ++out: ++ UNLOCK(&ctr_xlator_ctx->lock); ++ ++ return ret; ++} ++ ++int ++ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name, uuid_t old_pgfid, ++ const char *old_base_name) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ struct timeval current_time = {0}; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ /*Check if the hard link is present */ ++ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, old_pgfid, ++ old_base_name); ++ if (!ctr_hard_link) { ++ gf_msg_trace(this->name, 0, ++ "Hard link doesn't exist" ++ " in the list"); ++ /* Since the hard link is not present in the list ++ * we add it to the list */ ++ ret = ctr_add_hard_link(this, ctr_xlator_ctx, pgfid, base_name); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, ++ "Failed adding hard link to the list"); ++ goto out; ++ } ++ ret = 0; ++ goto out; ++ } ++ ++ /* update the hard link */ ++ gf_uuid_copy(ctr_hard_link->pgfid, pgfid); ++ GF_FREE(ctr_hard_link->base_name); ++ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED, ++ "Failed copying basename" ++ "to ctr_hard_link"); ++ /* delete the corrupted entry */ ++ __delete_hard_link_from_list(&ctr_hard_link); ++ ctr_hard_link = NULL; ++ goto out; ++ } ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ ctr_hard_link->hardlink_heal_period = 0; ++ } else { ++ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; ++ } ++ ++ ret = 0; ++ ++out: ++ UNLOCK(&ctr_xlator_ctx->lock); ++ ++ return ret; ++} ++ ++/* Delete all hardlinks */ ++static int ++ctr_delete_all_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx) ++{ ++ int ret = -1; ++ ctr_hard_link_t *ctr_hard_link = NULL; ++ ctr_hard_link_t *tmp = NULL; ++ ++ GF_ASSERT(ctr_xlator_ctx); ++ ++ LOCK(&ctr_xlator_ctx->lock); ++ ++ list_for_each_entry_safe(ctr_hard_link, tmp, &ctr_xlator_ctx->hardlink_list, ++ list) ++ { ++ /*Remove hard link from list*/ ++ __delete_hard_link_from_list(&ctr_hard_link); ++ ctr_hard_link = NULL; ++ } ++ ++ UNLOCK(&ctr_xlator_ctx->lock); ++ ++ ret = 0; ++ ++ return ret; ++} ++ ++/* Please lock the inode before using this function */ ++static ctr_xlator_ctx_t * ++__get_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ int ret = 0; ++ uint64_t _addr = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ ret = __inode_ctx_get(inode, this, &_addr); ++ if (ret < 0) ++ _addr = 0; ++ if (_addr != 0) { ++ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr; ++ } ++ ++ return ctr_xlator_ctx; ++} ++ ++ctr_xlator_ctx_t * ++init_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ int ret = -1; ++ uint64_t _addr = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ struct timeval current_time = {0}; ++ ++ GF_ASSERT(this); ++ GF_ASSERT(inode); ++ ++ LOCK(&inode->lock); ++ { ++ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode); ++ if (ctr_xlator_ctx) { ++ ret = 0; ++ goto out; ++ } ++ ctr_xlator_ctx = GF_CALLOC(1, sizeof(*ctr_xlator_ctx), ++ gf_ctr_mt_xlator_ctx); ++ if (!ctr_xlator_ctx) ++ goto out; ++ ++ ret = LOCK_INIT(&ctr_xlator_ctx->lock); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ret, CTR_MSG_INIT_LOCK_FAILED, ++ "Failed init lock %s", strerror(ret)); ++ goto out; ++ } ++ _addr = (uint64_t)(uintptr_t)ctr_xlator_ctx; ++ ++ ret = __inode_ctx_set(inode, this, &_addr); ++ if (ret) { ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&ctr_xlator_ctx->hardlink_list); ++ ++ ret = gettimeofday(¤t_time, NULL); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); ++ goto out; ++ } ++ ++ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec; ++ } ++ ret = 0; ++out: ++ if (ret) { ++ GF_FREE(ctr_xlator_ctx); ++ ctr_xlator_ctx = NULL; ++ } ++ ++ UNLOCK(&inode->lock); ++ ++ return ctr_xlator_ctx; ++} ++ ++void ++fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ int ret = 0; ++ uint64_t _addr = 0; ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ inode_ctx_del(inode, this, &_addr); ++ if (!_addr) ++ return; ++ ++ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr; ++ ++ ret = ctr_delete_all_hard_link(this, ctr_xlator_ctx); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DELETE_HARDLINK_FAILED, ++ "Failed deleting all " ++ "hard links from inode context"); ++ } ++ ++ LOCK_DESTROY(&ctr_xlator_ctx->lock); ++ ++ GF_FREE(ctr_xlator_ctx); ++} ++ ++ctr_xlator_ctx_t * ++get_ctr_xlator_ctx(xlator_t *this, inode_t *inode) ++{ ++ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; ++ ++ LOCK(&inode->lock); ++ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode); ++ UNLOCK(&inode->lock); ++ ++ return ctr_xlator_ctx; ++} +diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h +new file mode 100644 +index 0000000..4e3bf7e +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h +@@ -0,0 +1,68 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_XLATOR_CTX_H ++#define __CTR_XLATOR_CTX_H ++ ++#include <glusterfs/xlator.h> ++#include "ctr_mem_types.h" ++#include <glusterfs/iatt.h> ++#include <glusterfs/glusterfs.h> ++#include <glusterfs/xlator.h> ++#include <glusterfs/logging.h> ++#include <glusterfs/locking.h> ++#include <glusterfs/common-utils.h> ++#include <time.h> ++#include <sys/time.h> ++ ++typedef struct ctr_hard_link { ++ uuid_t pgfid; ++ char *base_name; ++ /* Hardlink expiry : Defines the expiry period after which a ++ * database heal is attempted. */ ++ uint64_t hardlink_heal_period; ++ struct list_head list; ++} ctr_hard_link_t; ++ ++typedef struct ctr_xlator_ctx { ++ /* This represents the looked up hardlinks ++ * NOTE: This doesn't represent all physical hardlinks of the inode*/ ++ struct list_head hardlink_list; ++ uint64_t inode_heal_period; ++ gf_lock_t lock; ++} ctr_xlator_ctx_t; ++ ++ctr_hard_link_t * ++ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name); ++ ++int ++ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name); ++ ++int ++ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name); ++ ++int ++ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, ++ uuid_t pgfid, const char *base_name, uuid_t old_pgfid, ++ const char *old_base_name); ++ ++ctr_xlator_ctx_t * ++get_ctr_xlator_ctx(xlator_t *this, inode_t *inode); ++ ++ctr_xlator_ctx_t * ++init_ctr_xlator_ctx(xlator_t *this, inode_t *inode); ++ ++void ++fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode); ++ ++#endif +diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h +new file mode 100644 +index 0000000..7b8f531 +--- /dev/null ++++ b/xlators/features/changetimerecorder/src/ctr_mem_types.h +@@ -0,0 +1,22 @@ ++/* ++ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __CTR_MEM_TYPES_H__ ++#define __CTR_MEM_TYPES_H__ ++ ++#include "gfdb_mem-types.h" ++ ++enum gf_ctr_mem_types_ { ++ gf_ctr_mt_private_t = gfdb_mt_end + 1, ++ gf_ctr_mt_xlator_ctx, ++ gf_ctr_mt_hard_link_t, ++ gf_ctr_mt_end ++}; ++#endif +-- +1.8.3.1 + diff --git a/SOURCES/0086-Revert-tiering-remove-the-translator-from-build-and-.patch b/SOURCES/0086-Revert-tiering-remove-the-translator-from-build-and-.patch new file mode 100644 index 0000000..b612ddf --- /dev/null +++ b/SOURCES/0086-Revert-tiering-remove-the-translator-from-build-and-.patch @@ -0,0 +1,3194 @@ +From 06adac5dbac7b2067232270cbee12931400f7824 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Sat, 6 Apr 2019 17:00:47 +0530 +Subject: [PATCH 086/124] Revert "tiering: remove the translator from build and + glusterd" + +This reverts commit 55a6ba56bea9ec0d3316c005300c514ea3ab0e54. +Add the test files and glusterd related changes. + +Label: DOWNSTREAM ONLY + +Change-Id: Ib704b7142a82cb1e94538a48af916730992a5701 +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166246 +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-by: Nithya Balachandran <nbalacha@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + MAINTAINERS | 18 + + tests/basic/afr/granular-esh/cli.t | 30 +- + ...1214222-directories_missing_after_attach_tier.t | 61 ++ + ...60185-donot-allow-detach-commit-unnecessarily.t | 47 ++ + tests/basic/tier/ctr-rename-overwrite.t | 50 ++ + tests/basic/tier/file_lock.c | 72 ++ + tests/basic/tier/file_with_spaces.t | 71 ++ + tests/basic/tier/fops-during-migration-pause.t | 89 +++ + tests/basic/tier/fops-during-migration.t | 105 +++ + tests/basic/tier/frequency-counters.t | 82 +++ + tests/basic/tier/legacy-many.t | 92 +++ + tests/basic/tier/locked_file_migration.t | 80 +++ + tests/basic/tier/new-tier-cmds.t | 129 ++++ + tests/basic/tier/readdir-during-migration.t | 65 ++ + tests/basic/tier/record-metadata-heat.t | 106 +++ + tests/basic/tier/tier-heald.t | 98 +++ + tests/basic/tier/tier-snapshot.t | 47 ++ + tests/basic/tier/tier.t | 219 +++++++ + tests/basic/tier/tier_lookup_heal.t | 69 ++ + tests/basic/tier/tierd_check.t | 128 ++++ + tests/basic/tier/unlink-during-migration.t | 92 +++ + ...03028-Rebalance-glusterd-rpc-connection-issue.t | 78 +++ + tests/bugs/quota/bug-1288474.t | 51 ++ + .../bug-1290965-detect-bitrotten-objects.t | 53 ++ + .../tier/bug-1205545-CTR-and-trash-integration.t | 72 ++ + tests/bugs/tier/bug-1279376-rename-demoted-file.t | 93 +++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 75 +++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 723 +++++++++++++++++++++ + 28 files changed, 2894 insertions(+), 1 deletion(-) + create mode 100755 tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t + create mode 100644 tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t + create mode 100755 tests/basic/tier/ctr-rename-overwrite.t + create mode 100644 tests/basic/tier/file_lock.c + create mode 100755 tests/basic/tier/file_with_spaces.t + create mode 100755 tests/basic/tier/fops-during-migration-pause.t + create mode 100755 tests/basic/tier/fops-during-migration.t + create mode 100644 tests/basic/tier/frequency-counters.t + create mode 100644 tests/basic/tier/legacy-many.t + create mode 100755 tests/basic/tier/locked_file_migration.t + create mode 100644 tests/basic/tier/new-tier-cmds.t + create mode 100644 tests/basic/tier/readdir-during-migration.t + create mode 100755 tests/basic/tier/record-metadata-heat.t + create mode 100644 tests/basic/tier/tier-heald.t + create mode 100644 tests/basic/tier/tier-snapshot.t + create mode 100755 tests/basic/tier/tier.t + create mode 100755 tests/basic/tier/tier_lookup_heal.t + create mode 100644 tests/basic/tier/tierd_check.t + create mode 100755 tests/basic/tier/unlink-during-migration.t + create mode 100644 tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t + create mode 100755 tests/bugs/quota/bug-1288474.t + create mode 100644 tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t + create mode 100644 tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t + create mode 100755 tests/bugs/tier/bug-1279376-rename-demoted-file.t + +diff --git a/MAINTAINERS b/MAINTAINERS +index b1fc0ee..1f4c93a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -103,6 +103,12 @@ P: Kotresh HR <khiremat@redhat.com> + S: Maintained + F: xlators/features/changelog/ + ++Changetimerecorder ++M: Shyamsundar Ranganathan <srangana@redhat.com> ++P: Hari Gowtham <hgowtham@redhat.com> ++S: Maintained ++F: xlators/features/changetimerecorder/ ++ + Decompounder + M: Krutika Dhananjay <kdhananj@redhat.com> + P: Pranith Karampuri <pkarampu@redhat.com> +@@ -248,6 +254,12 @@ P: Xavier Hernandez <xhernandez@redhat.com> + S: Maintained + F: xlators/features/shard/ + ++Tiering ++M: Shyamsundar Ranganathan <srangana@redhat.com> ++P: Hari Gowtham <hgowtham@redhat.com> ++S: Maintained ++F: xlators/cluster/dht/src/tier.c ++ + Trash + M: Anoop C S <anoopcs@redhat.com> + M: Jiffin Tony Thottan <jthottan@redhat.com> +@@ -327,6 +339,12 @@ P: Soumya Koduri <skoduri@redhat.com> + S: Maintained + F: api/ + ++libgfdb ++M: Shyamsundar Ranganathan <srangana@redhat.com> ++P: Hari Gowtham <hgowtham@redhat.com> ++S: Maintained ++F: libglusterfs/src/gfdb/ ++ + libglusterfs + M: Amar Tumballi <amarts@redhat.com> + M: Jeff Darcy <jeff@pl.atyp.us> +diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t +index 10b6c63..995d93e 100644 +--- a/tests/basic/afr/granular-esh/cli.t ++++ b/tests/basic/afr/granular-esh/cli.t +@@ -11,7 +11,7 @@ TESTS_EXPECTED_IN_LOOP=4 + TEST glusterd + TEST pidof glusterd + +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} + # Test that enabling the option should work on a newly created volume + TEST $CLI volume set $V0 cluster.granular-entry-heal on + TEST $CLI volume set $V0 cluster.granular-entry-heal off +@@ -25,6 +25,34 @@ TEST $CLI volume start $V1 + TEST ! $CLI volume heal $V1 granular-entry-heal enable + TEST ! $CLI volume heal $V1 granular-entry-heal disable + ++####################### ++###### TIER TEST ###### ++####################### ++# Execute the same command on a disperse + replicate tiered volume and make ++# sure the option is set on the replicate leg of the volume ++TEST $CLI volume tier $V1 attach replica 2 $H0:$B0/${V1}{3,4} ++TEST $CLI volume heal $V1 granular-entry-heal enable ++EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume heal $V1 granular-entry-heal disable ++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++ ++# Kill a disperse brick and make heal be pending on the volume. ++TEST kill_brick $V1 $H0 $B0/${V1}0 ++ ++# Now make sure that one offline brick in disperse does not affect enabling the ++# option on the volume. ++TEST $CLI volume heal $V1 granular-entry-heal enable ++EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume heal $V1 granular-entry-heal disable ++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++ ++# Now kill a replicate brick. ++TEST kill_brick $V1 $H0 $B0/${V1}3 ++# Now make sure that one offline brick in replicate causes the command to be ++# failed. ++TEST ! $CLI volume heal $V1 granular-entry-heal enable ++EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++ + ###################### + ### REPLICATE TEST ### + ###################### +diff --git a/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t b/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t +new file mode 100755 +index 0000000..f9166d7 +--- /dev/null ++++ b/tests/basic/tier/bug-1214222-directories_missing_after_attach_tier.t +@@ -0,0 +1,61 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=5 ++ ++ ++LAST_BRICK=1 ++CACHE_BRICK=2 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=5 ++MIGRATION_TIMEOUT=10 ++cleanup ++ ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Basic operations. ++cd $M0 ++TEST stat . ++TEST mkdir d1 ++TEST [ -d d1 ] ++TEST touch file1 ++TEST [ -e file1 ] ++ ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++TEST $CLI volume set $V0 features.ctr-enabled on ++ ++#check whether the directory's and files are present on mount or not. ++TEST [ -d d1 ] ++TEST [ -e file1 ] ++ ++cd ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0; ++ ++tier_status () ++{ ++ $CLI volume tier $V0 detach status | grep progress | wc -l ++} ++ ++TEST $CLI volume tier $V0 detach start ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_status ++TEST $CLI volume tier $V0 detach commit ++ ++EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST} ++ ++EXPECT_WITHIN $REBALANCE_TIMEOUT "0" confirm_vol_stopped $V0 ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t b/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t +new file mode 100644 +index 0000000..6efbe32 +--- /dev/null ++++ b/tests/basic/tier/bug-1260185-donot-allow-detach-commit-unnecessarily.t +@@ -0,0 +1,47 @@ ++#!/bin/bash ++ ++## Test case for BZ: 1260185 ++## Do not allow detach-tier commit without "force" option or without ++## user have not started "detach-tier start" operation ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../tier.rc ++ ++cleanup; ++ ++## Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++ ++## Lets create and start the volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2} ++TEST $CLI volume start $V0 ++ ++## Perform attach-tier operation on volume $V0 ++TEST $CLI volume tier $V0 attach $H0:$B0/${V0}{3..4} ++ ++## detach-tier commit operation without force option on volume $V0 ++## should not succeed ++TEST ! $CLI --mode=script volume tier $V0 detach commit ++ ++## detach-tier commit operation with force option on volume $V0 ++## should succeed ++TEST $CLI volume tier $V0 detach force ++ ++sleep 3 ++ ++## Again performing attach-tier operation on volume $V0 ++TEST $CLI volume tier $V0 attach $H0:$B0/${V0}{5..6} ++ ++## Do detach-tier start on volume $V0 ++TEST $CLI volume tier $V0 detach start ++ ++## Now detach-tier commit on volume $V0 should succeed. ++## wait for the detach to complete ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_commit_for_single_node ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1517961 +diff --git a/tests/basic/tier/ctr-rename-overwrite.t b/tests/basic/tier/ctr-rename-overwrite.t +new file mode 100755 +index 0000000..73ee758 +--- /dev/null ++++ b/tests/basic/tier/ctr-rename-overwrite.t +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++LAST_BRICK=1 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++ ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++ ++cleanup ++ ++# Start glusterd ++TEST glusterd ++TEST pidof glusterd ++ ++# Set-up tier cluster ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ ++# Start and mount the volume after enabling CTR ++TEST $CLI volume set $V0 features.ctr-enabled on ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# create two files ++echo "hello world" > $M0/file1 ++echo "hello world" > $M0/file2 ++ ++# db in hot brick shows 4 record. 2 for file1 and 2 for file2 ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 4 ] ++ ++#overwrite file2 with file1 ++mv -f $M0/file1 $M0/file2 ++ ++# Now the db in hot tier should have only 2 records for file1. ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++cleanup ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/file_lock.c b/tests/basic/tier/file_lock.c +new file mode 100644 +index 0000000..20fdbc0 +--- /dev/null ++++ b/tests/basic/tier/file_lock.c +@@ -0,0 +1,72 @@ ++#include <stdio.h> ++#include <stdlib.h> ++#include <unistd.h> ++#include <fcntl.h> ++ ++void ++usage(void) ++{ ++ printf("Usage: testlock <filepath> [R|W]\n"); ++ return; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ char *file_path = NULL; ++ int fd = -1; ++ struct flock lock = {0}; ++ int ret = -1; ++ int c = 0; ++ ++ if (argc != 3) { ++ usage(); ++ exit(1); ++ } ++ ++ file_path = argv[1]; ++ fd = open(file_path, O_RDWR); ++ ++ if (-1 == fd) { ++ printf("Failed to open file %s. %m\n", file_path); ++ exit(1); ++ } ++ ++ /* TODO: Check for invalid input*/ ++ ++ if (!strcmp(argv[2], "W")) { ++ lock.l_type = F_WRLCK; ++ printf("Taking write lock\n"); ++ ++ } else { ++ lock.l_type = F_RDLCK; ++ printf("Taking read lock\n"); ++ } ++ ++ lock.l_whence = SEEK_SET; ++ lock.l_start = 0; ++ lock.l_len = 0; ++ lock.l_pid = getpid(); ++ ++ printf("Acquiring lock on %s\n", file_path); ++ ret = fcntl(fd, F_SETLK, &lock); ++ if (ret) { ++ printf("Failed to acquire lock on %s (%m)\n", file_path); ++ close(fd); ++ exit(1); ++ } ++ ++ sleep(10); ++ ++ /*Unlock*/ ++ ++ printf("Releasing lock on %s\n", file_path); ++ lock.l_type = F_UNLCK; ++ ret = fcntl(fd, F_SETLK, &lock); ++ if (ret) { ++ printf("Failed to release lock on %s (%m)\n", file_path); ++ } ++ ++ close(fd); ++ return ret; ++} +diff --git a/tests/basic/tier/file_with_spaces.t b/tests/basic/tier/file_with_spaces.t +new file mode 100755 +index 0000000..919b900 +--- /dev/null ++++ b/tests/basic/tier/file_with_spaces.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++DEMOTE_TIMEOUT=10 ++PROMOTE_FREQ=5 ++ ++FILE_SPACE="Testing filenames with spaces.log" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++ ++# The file will be created on the hot tier ++ ++touch "$M0/$FILE_SPACE" ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name "$FILE_SPACE"` ++echo "File path on hot tier: "$HPATH ++ ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_TIMEOUT ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name "$FILE_SPACE"` ++echo "File path on cold tier: "$CPATH ++ ++EXPECT "yes" exists_and_regular_file $CPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/fops-during-migration-pause.t b/tests/basic/tier/fops-during-migration-pause.t +new file mode 100755 +index 0000000..46fc6e4 +--- /dev/null ++++ b/tests/basic/tier/fops-during-migration-pause.t +@@ -0,0 +1,89 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=10 ++PROMOTE_FREQ=10 ++ ++TEST_STR="Testing write and truncate fops on tier migration" ++ ++function is_sticky_set () { ++ echo $1 ++ if [ -k $1 ]; ++ then ++ echo "yes" ++ else ++ echo "no" ++ fi ++} ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++TEST mkdir $M0/dir1 ++ ++# Create a large file (800MB), so that rebalance takes time ++# The file will be created on the hot tier ++sleep_until_mid_cycle $DEMOTE_FREQ ++dd if=/dev/zero of=$M0/dir1/FILE1 bs=256k count=5120 ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name FILE1` ++echo "File path on hot tier: "$HPATH ++ ++ ++# Wait for the tier process to demote the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH ++ ++TEST $CLI volume set $V0 cluster.tier-pause on ++ ++# Wait for the tier process to finish migrating the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "no" is_sticky_set $HPATH ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name FILE1` ++ ++# make sure destination is empty ++TEST ! test -s $CPATH ++ ++# make sure source exists and not empty ++TEST test -s $HPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/fops-during-migration.t b/tests/basic/tier/fops-during-migration.t +new file mode 100755 +index 0000000..458c01e +--- /dev/null ++++ b/tests/basic/tier/fops-during-migration.t +@@ -0,0 +1,105 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++ ++TEST_STR="Testing write and truncate fops on tier migration" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume set $V0 cluster.force-migration on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++# Checks that the contents of the file matches the input string ++#$1 : file_path ++#$2 : comparison string ++ ++function check_file_content () { ++ contents=`cat $1` ++ echo $contents ++ if [ "$contents" = "$2" ]; then ++ echo "1" ++ else ++ echo "0" ++ fi ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++$CLI volume set $V0 diagnostics.client-log-level DEBUG ++ ++TEST mkdir $M0/dir1 ++ ++# Create a large file (320MB), so that rebalance takes time ++# The file will be created on the hot tier ++ ++dd if=/dev/zero of=$M0/dir1/FILE1 bs=64k count=5120 ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name FILE1` ++echo "File path on hot tier: "$HPATH ++ ++ ++# Wait for the tier process to demote the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name FILE1` ++echo "File path on cold tier: "$CPATH ++ ++# Test setxattr ++TEST setfattr -n "user.test_xattr" -v "qwerty" $M0/dir1/FILE1 ++ ++# Change the file contents while it is being migrated ++echo $TEST_STR > $M0/dir1/FILE1 ++ ++# The file contents should have changed even if the file ++# is not done migrating ++EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR" ++ ++ ++# Wait for the tier process to finish migrating the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "no" is_sticky_set $CPATH ++ ++# The file contents should have changed ++EXPECT "1" check_file_content $M0/dir1/FILE1 "$TEST_STR" ++ ++ ++TEST getfattr -n "user.test_xattr" $M0/dir1/FILE1 ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/frequency-counters.t b/tests/basic/tier/frequency-counters.t +new file mode 100644 +index 0000000..08e05df +--- /dev/null ++++ b/tests/basic/tier/frequency-counters.t +@@ -0,0 +1,82 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=10 ++PROMOTE_FREQ=10 ++NUM_FILES=5 ++TEST_DIR=test ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++} ++ ++function create_dist_tier_vol () { ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 features.record-counters on ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 2 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 2 ++} ++ ++cleanup; ++ ++ ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++# create some files ++mkdir $M0/$TEST_DIR ++cd $M0/${TEST_DIR} ++ ++date > file1 ++touch file2 ++ ++# attach tier ++create_dist_tier_vol $NUM_BRICKS ++ ++sleep_until_mid_cycle $PROMOTE_FREQ ++ ++# check if promotion on single hit, should fail ++date >> file2 ++cat file1 ++drop_cache $M0 ++sleep $PROMOTE_FREQ ++EXPECT "0" check_counters 0 0 ++ ++# check if promotion on double hit, should suceed ++sleep_until_mid_cycle $PROMOTE_FREQ ++date >> file2 ++drop_cache $M0 ++cat file1 ++date >> file2 ++drop_cache $M0 ++cat file1 ++ ++EXPECT_WITHIN $PROMOTE_FREQ "0" check_counters 2 0 ++ ++TEST ! $CLI volume set $V0 features.record-counters off ++ ++cd / ++ ++cleanup ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/legacy-many.t b/tests/basic/tier/legacy-many.t +new file mode 100644 +index 0000000..5795428 +--- /dev/null ++++ b/tests/basic/tier/legacy-many.t +@@ -0,0 +1,92 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=12 ++MIGRATION_TIMEOUT=10 ++DEMOTE_FREQ=60 ++PROMOTE_FREQ=10 ++TEST_DIR="test_files" ++NUM_FILES=15 ++ ++function read_all { ++ for file in * ++ do ++ cat $file ++ done ++} ++ ++function tier_status () { ++ $CLI volume tier $V0 status | grep "success" | wc -l ++} ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++# Create distributed replica volume ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++ ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 features.ctr-enabled on ++ ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create a number of "legacy" files before attaching tier ++mkdir $M0/${TEST_DIR} ++cd $M0/${TEST_DIR} ++TEST create_many_files file $NUM_FILES ++wait ++ ++# Attach tier ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume set $V0 cluster.tier-mode test ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ ++# wait a little for lookup heal to finish ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status ++ ++# make sure fix layout completed ++CPATH=$B0/${V0}0 ++echo $CPATH > /tmp/out ++TEST getfattr -n "trusted.tier.fix.layout.complete" $CPATH ++ ++# Read "legacy" files ++drop_cache $M0 ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++ ++TEST read_all ++ ++# Test to make sure files were promoted as expected ++sleep $PROMOTE_TIMEOUT ++EXPECT_WITHIN $PROMOTE_TIMEOUT "0" check_counters $NUM_FILES 0 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}" ++ ++TEST $CLI volume tier $V0 detach commit ++ ++# fix layout flag should be cleared ++TEST ! getfattr -n "trusted.tier.fix.layout.complete" $CPATH ++ ++cd; ++cleanup ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/locked_file_migration.t b/tests/basic/tier/locked_file_migration.t +new file mode 100755 +index 0000000..7fb1717 +--- /dev/null ++++ b/tests/basic/tier/locked_file_migration.t +@@ -0,0 +1,80 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=7 ++PROMOTE_FREQ=30 ++DEMOTE_TIMEOUT=15 ++ ++TEST_STR="Testing write and truncate fops on tier migration" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ ++#We don't want promotes to happen in this test ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 10 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 10 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++# Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++TEST mkdir $M0/dir1 ++build_tester $(dirname $0)/file_lock.c -o file_lock ++cp $(dirname $0)/file_lock $M0/file_lock ++ ++# The files will be created on the hot tier ++touch $M0/dir1/FILE1 ++touch $M0/dir1/FILE2 ++ ++# For FILE1, take a POSIX write lock on the entire file. ++# Don't take a lock on FILE2 ++ ++./file_lock $M0/dir1/FILE1 W & ++ ++sleep $DEMOTE_FREQ ++ ++# Wait for the tier process to demote the file ++# Only FILE2 and file_lock should be demoted ++# FILE1 should be skipped because of the lock held ++# on it ++ ++EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 0 2 ++ ++sleep 10 ++ ++rm $(dirname $0)/file_lock ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t +new file mode 100644 +index 0000000..b9c9390 +--- /dev/null ++++ b/tests/basic/tier/new-tier-cmds.t +@@ -0,0 +1,129 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++. $(dirname $0)/../../cluster.rc ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function check_peers { ++ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ ++function create_dist_tier_vol () { ++ TEST $CLI_1 volume create $V0 disperse 6 redundancy 2 $H1:$B1/${V0}_b1 $H2:$B2/${V0}_b2 $H3:$B3/${V0}_b3 $H1:$B1/${V0}_b4 $H2:$B2/${V0}_b5 $H3:$B3/${V0}_b6 ++ TEST $CLI_1 volume start $V0 ++ TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6 ++} ++ ++function tier_daemon_status { ++ local _VAR=CLI_$1 ++ local xpath_sel='//node[hostname="Tier Daemon"][path="localhost"]/status' ++ ${!_VAR} --xml volume status $V0 \ ++ | xmllint --xpath "$xpath_sel" - \ ++ | sed -n '/.*<status>\([0-9]*\).*/s//\1/p' ++} ++ ++function detach_xml_status { ++ $CLI_1 volume tier $V0 detach status --xml | sed -n \ ++ '/.*<opErrstr>Detach tier status successful/p' | wc -l ++} ++ ++cleanup; ++ ++#setup cluster and test volume ++TEST launch_cluster 3; # start 3-node virtual cluster ++TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli ++TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++ ++#Create and start a tiered volume ++create_dist_tier_vol ++ ++########### check failure for older commands ############# ++ ++TEST ! $CLI_1 volume rebalance $V0 tier status ++ ++# failure for older command can be removed in 3.11 ++ ++########################################################## ++ ++#Issue detach tier on the tiered volume ++#Will throw error saying detach tier not started ++ ++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status ++ ++EXPECT "0" detach_xml_status ++ ++#kill a node ++TEST kill_node 2 ++ ++#check if we have the rest of the node available printed in the output of detach status ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status_node_down ++ ++TEST $glusterd_2; ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++ ++#after starting detach tier the detach tier status should display the status ++sleep 2 ++$CLI_1 volume status ++TEST $CLI_1 volume tier $V0 detach start ++ ++EXPECT "1" detach_xml_status ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status ++ ++#kill a node ++TEST kill_node 2 ++ ++#check if we have the rest of the node available printed in the output of detach status ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status_node_down ++ ++TEST $glusterd_2; ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++# Make sure we check that the *bricks* are up and not just the node. >:-( ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2 ++ ++# Parsing normal output doesn't work because of line-wrap issues on our ++# regression machines, and the version of xmllint there doesn't support --xpath ++# so we can't do it that way either. In short, there's no way for us to detect ++# when we can stop waiting, so we just have to wait the maximum time every time ++# and hope any failures will show up later in the script. ++sleep $PROCESS_UP_TIMEOUT ++#XPECT_WITHIN $PROCESS_UP_TIMEOUT 1 tier_daemon_status 2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status ++ ++TEST $CLI_1 volume tier $V0 detach stop ++ ++#If detach tier is stopped the detach tier command will fail ++ ++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status ++ ++TEST $CLI_1 volume tier $V0 detach start ++ ++#wait for the detach to complete ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_commit ++ ++#If detach tier is committed then the detach status should fail throwing an error ++#saying its not a tiered volume ++ ++EXPECT "Tier command failed" $CLI_1 volume tier $V0 detach status ++ ++########### check failure for older commands ############# ++ ++TEST ! $CLI_1 volume rebalance $V0 tier start ++ ++# failure for older command can be removed in 3.11 ++ ++########################################################## ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/readdir-during-migration.t b/tests/basic/tier/readdir-during-migration.t +new file mode 100644 +index 0000000..292ca88 +--- /dev/null ++++ b/tests/basic/tier/readdir-during-migration.t +@@ -0,0 +1,65 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++NUM_FILES=30 ++TEST_DIR=test ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++} ++ ++function check_file_count() { ++ if [ $(ls -1 | wc -l) == $1 ]; then ++ echo "1" ++ else ++ echo "0" ++ fi ++} ++ ++cleanup; ++ ++ ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++# Create a number of "legacy" files before attaching tier ++mkdir $M0/${TEST_DIR} ++cd $M0/${TEST_DIR} ++TEST create_many_files tfile $NUM_FILES ++ ++EXPECT "1" check_file_count $NUM_FILES ++ ++sleep $DEMOTE_FREQ ++ ++EXPECT "1" check_file_count $NUM_FILES ++ ++cd / ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/record-metadata-heat.t b/tests/basic/tier/record-metadata-heat.t +new file mode 100755 +index 0000000..f6f35a8 +--- /dev/null ++++ b/tests/basic/tier/record-metadata-heat.t +@@ -0,0 +1,106 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=3 ++DEMOTE_FREQ=5 ++DEMOTE_TIMEOUT=10 ++PROMOTE_FREQ=5 ++ ++FILE="file1.txt" ++FILE_LINK="file2.txt" ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 4 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 4 ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++ ++# The file will be created on the hot tier ++touch "$M0/$FILE" ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name "$FILE"` ++echo "File path on hot tier: "$HPATH ++ ++############################################ ++# as per the changes on b8b050c3 ++# To test the xttr set by EC ++TEST ! getfattr -n "trusted.ec.size" $HPATH ++############################################ ++ ++# Expecting the file to be on the hot tier ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++ ++# Try to heat the file using 5 metadata operations ++# WITHOUT setting ctr-record-metadata-heat on ++touch "$M0/$FILE" ++chmod +x "$M0/$FILE" ++chown root "$M0/$FILE" ++ln "$M0/$FILE" "$M0/$FILE_LINK" ++rm -rf "$M0/$FILE_LINK" ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_TIMEOUT ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name "$FILE"` ++echo "File path on cold tier: "$CPATH ++ ++# Expecting the file to be on cold tier ++EXPECT "yes" exists_and_regular_file $CPATH ++ ++#Set ctr-record-metadata-heat on ++TEST $CLI volume set $V0 ctr-record-metadata-heat on ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++ ++# Heating the file using 5 metadata operations ++touch "$M0/$FILE" ++chmod +x "$M0/$FILE" ++chown root "$M0/$FILE" ++ln "$M0/$FILE" "$M0/$FILE_LINK" ++rm -rf "$M0/$FILE_LINK" ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_TIMEOUT ++ ++# Get the path of the file on the hot tier ++echo "File path on hot tier: "$HPATH ++ ++# Expecting the file to be on the hot tier ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t +new file mode 100644 +index 0000000..a8e634f +--- /dev/null ++++ b/tests/basic/tier/tier-heald.t +@@ -0,0 +1,98 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++# This test contains volume heal commands handled by glusterd. ++# Covers enable/disable at the moment. Will be enhanced later to include ++# the other commands as well. ++ ++cleanup; ++TEST glusterd ++TEST pidof glusterd ++ ++volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" ++ ++# Commands should fail when both tiers are not of distribute type. ++# Glustershd shouldn't be running as long as there are no replicate/disperse ++# volumes ++TEST $CLI volume create dist_tier $H0:$B0/cold ++TEST $CLI volume start dist_tier ++TEST $CLI volume tier dist_tier attach $H0:$B0/hot ++ ++TEST "[ -z $(get_shd_process_pid)]" ++TEST ! $CLI volume heal dist_tier enable ++TEST ! $CLI volume heal dist_tier disable ++ ++# Commands should work on replicate/disperse volume. ++TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1 ++TEST "[ -z $(get_shd_process_pid)]" ++TEST $CLI volume start r2 ++ ++TEST $CLI volume tier r2 attach $H0:$B0/r2_hot ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal r2 enable ++EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" ++EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal r2 disable ++EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" ++EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++# Commands should work on disperse volume. ++TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 ++TEST $CLI volume start ec2 ++ ++TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4} ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal ec2 enable ++EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" ++EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++TEST $CLI volume heal ec2 disable ++EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" ++EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++ ++#Check that shd graph is rewritten correctly on volume stop/start ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++TEST $CLI volume stop r2 ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++TEST $CLI volume stop ec2 ++# When both the volumes are stopped glustershd volfile is not modified just the ++# process is stopped ++TEST "[ -z $(get_shd_process_pid) ]" ++ ++TEST $CLI volume start r2 ++EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++ ++TEST $CLI volume start ec2 ++ ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++ ++TEST $CLI volume tier ec2 detach force ++ ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++ ++TEST $CLI volume set r2 self-heal-daemon on ++TEST $CLI volume set r2 cluster.self-heal-daemon off ++TEST ! $CLI volume set ec2 self-heal-daemon off ++TEST ! $CLI volume set ec2 cluster.self-heal-daemon on ++TEST ! $CLI volume set dist self-heal-daemon off ++TEST ! $CLI volume set dist cluster.self-heal-daemon on ++ ++TEST $CLI volume set ec2 disperse-self-heal-daemon off ++TEST $CLI volume set ec2 cluster.disperse-self-heal-daemon on ++TEST ! $CLI volume set r2 disperse-self-heal-daemon on ++TEST ! $CLI volume set r2 cluster.disperse-self-heal-daemon off ++TEST ! $CLI volume set dist disperse-self-heal-daemon off ++TEST ! $CLI volume set dist cluster.disperse-self-heal-daemon on ++ ++cleanup ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/tier-snapshot.t b/tests/basic/tier/tier-snapshot.t +new file mode 100644 +index 0000000..8747c5d +--- /dev/null ++++ b/tests/basic/tier/tier-snapshot.t +@@ -0,0 +1,47 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup; ++ ++TEST init_n_bricks 4; ++TEST setup_lvm 4; ++ ++TEST glusterd; ++ ++TEST $CLI volume create $V0 replica 2 $H0:$L1 $H0:$L2 ; ++ ++TEST $CLI volume start $V0; ++ ++TEST $CLI volume tier $V0 attach replica 2 $H0:$L3 $H0:$L4 ; ++ ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0; ++ ++for i in {1..10} ; do echo "file" > $M0/file$i ; done ++ ++TEST $CLI snapshot config activate-on-create enable ++ ++TEST $CLI snapshot create snap1 $V0 no-timestamp; ++ ++for i in {11..20} ; do echo "file" > $M0/file$i ; done ++ ++TEST $CLI snapshot create snap2 $V0 no-timestamp; ++ ++mkdir $M0/dir1; ++mkdir $M0/dir2; ++ ++for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done ++for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done ++ ++TEST $CLI snapshot create snap3 $V0 no-timestamp; ++ ++for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done ++for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done ++ ++TEST $CLI snapshot create snap4 $V0 no-timestamp; ++ ++TEST $CLI snapshot delete all; ++ ++cleanup; ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t +new file mode 100755 +index 0000000..1798541 +--- /dev/null ++++ b/tests/basic/tier/tier.t +@@ -0,0 +1,219 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++DEMOTE_TIMEOUT=12 ++PROMOTE_TIMEOUT=5 ++MIGRATION_TIMEOUT=10 ++DEMOTE_FREQ=4 ++PROMOTE_FREQ=12 ++ ++function file_on_slow_tier { ++ found=0 ++ ++ for i in `seq 0 $LAST_BRICK`; do ++ test -e "$B0/${V0}${i}/$1" && found=1 && break; ++ done ++ ++ if [ "$found" == "1" ] ++ then ++ slow_hash1=$2 ++ slow_hash2=$(fingerprint "$B0/${V0}${i}/$1") ++ ++ if [ "$slow_hash1" == "$slow_hash2" ] ++ then ++ echo "0" ++ else ++ echo "2" ++ fi ++ else ++ echo "1" ++ fi ++ ++ # temporarily disable non-Linux tests. ++ case $OSTYPE in ++ NetBSD | FreeBSD | Darwin) ++ echo "0" ++ ;; ++ esac ++} ++ ++function file_on_fast_tier { ++ found=0 ++ ++ for j in `seq $CACHE_BRICK_FIRST $CACHE_BRICK_LAST`; do ++ test -e "$B0/${V0}${j}/$1" && found=1 && break; ++ done ++ ++ ++ if [ "$found" == "1" ] ++ then ++ fast_hash1=$2 ++ fast_hash2=$(fingerprint "$B0/${V0}${j}/$1") ++ ++ if [ "$fast_hash1" == "$fast_hash2" ] ++ then ++ echo "0" ++ else ++ echo "2" ++ fi ++ else ++ echo "1" ++ fi ++} ++ ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++# testing bug 1215122, ie should fail if replica count and bricks are not compatible. ++ ++TEST ! $CLI volume tier $V0 attach replica 5 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume start $V0 ++ ++# The following two commands instigate a graph switch. Do them ++# before attaching the tier. If done on a tiered volume the rebalance ++# daemon will terminate and must be restarted manually. ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++ ++#Not a tier volume ++TEST ! $CLI volume set $V0 cluster.tier-demote-frequency 4 ++ ++#testing bug #1228112, glusterd crashed when trying to detach-tier commit force on a non-tiered volume. ++TEST ! $CLI volume tier $V0 detach commit force ++ ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++TEST $CLI volume set $V0 cluster.tier-mode test ++ ++# create a file, make sure it can be deleted after attach tier. ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++cd $M0 ++TEST touch delete_me.txt ++TEST rm -f delete_me.txt ++ ++# confirm watermark CLI works ++TEST $CLI volume set $V0 cluster.watermark-hi 85 ++TEST $CLI volume set $V0 cluster.watermark-low 75 ++TEST $CLI volume set $V0 cluster.tier-max-mb 1000 ++TEST $CLI volume set $V0 cluster.tier-max-files 1000 ++TEST $CLI volume set $V0 cluster.tier-max-promote-file-size 1000 ++TEST ! $CLI volume set $V0 cluster.tier-max-files -3 ++TEST ! $CLI volume set $V0 cluster.watermark-low 90 ++TEST ! $CLI volume set $V0 cluster.watermark-hi 75 ++TEST ! $CLI volume set $V0 cluster.read-freq-threshold -12 ++TEST ! $CLI volume set $V0 cluster.write-freq-threshold -12 ++ ++#check for watermark reset ++TEST $CLI volume set $V0 cluster.watermark-low 10 ++TEST $CLI volume set $V0 cluster.watermark-hi 30 ++TEST ! $CLI volume reset $V0 cluster.watermark-low ++TEST $CLI volume reset $V0 cluster.watermark-hi ++TEST $CLI volume reset $V0 cluster.watermark-low ++ ++# stop the volume and restart it. The rebalance daemon should restart. ++cd /tmp ++umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++ ++wait_for_tier_start ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++cd $M0 ++ ++sleep_first_cycle $DEMOTE_FREQ ++$CLI volume tier $V0 status ++ ++#Tier options expect non-negative value ++TEST ! $CLI volume set $V0 cluster.tier-promote-frequency -1 ++ ++#Tier options expect non-negative value ++TEST ! $CLI volume set $V0 cluster.read-freq-threshold qwerty ++ ++ ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ ++# Basic operations. ++TEST stat . ++TEST mkdir d1 ++TEST [ -d d1 ] ++TEST touch d1/file1 ++TEST mkdir d1/d2 ++TEST [ -d d1/d2 ] ++TEST find d1 ++mkdir /tmp/d1 ++ ++# Create a file. It should be on the fast tier. ++uuidgen > /tmp/d1/data.txt ++md5data=$(fingerprint /tmp/d1/data.txt) ++mv /tmp/d1/data.txt ./d1/data.txt ++ ++TEST file_on_fast_tier d1/data.txt $md5data ++ ++uuidgen > /tmp/d1/data2.txt ++md5data2=$(fingerprint /tmp/d1/data2.txt) ++cp /tmp/d1/data2.txt ./d1/data2.txt ++ ++#File with spaces and special characters. ++SPACE_FILE="file with spaces & $peci@l ch@r@cter$ @!@$%^$#@^^*&%$#$%.txt" ++ ++uuidgen > "/tmp/d1/$SPACE_FILE" ++md5space=$(fingerprint "/tmp/d1/$SPACE_FILE") ++mv "/tmp/d1/$SPACE_FILE" "./d1/$SPACE_FILE" ++ ++# Check auto-demotion on write new. ++sleep $DEMOTE_TIMEOUT ++ ++# Check auto-promotion on write append. ++UUID=$(uuidgen) ++echo $UUID >> /tmp/d1/data2.txt ++md5data2=$(fingerprint /tmp/d1/data2.txt) ++ ++sleep_until_mid_cycle $DEMOTE_FREQ ++drop_cache $M0 ++ ++echo $UUID >> ./d1/data2.txt ++cat "./d1/$SPACE_FILE" ++ ++sleep $PROMOTE_TIMEOUT ++sleep $DEMOTE_FREQ ++EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 2 6 ++ ++# stop gluster, when it comes back info file should have tiered volume ++killall glusterd ++TEST glusterd ++ ++EXPECT "0" file_on_slow_tier d1/data.txt $md5data ++EXPECT "0" file_on_slow_tier d1/data2.txt $md5data2 ++EXPECT "0" file_on_slow_tier "./d1/$SPACE_FILE" $md5space ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" detach_start $V0 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}${CACHE_BRICK_FIRST}" ++ ++TEST $CLI volume tier $V0 detach commit ++ ++EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST} ++ ++confirm_vol_stopped $V0 ++ ++cd; ++ ++cleanup ++rm -rf /tmp/d1 ++ ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/tier_lookup_heal.t b/tests/basic/tier/tier_lookup_heal.t +new file mode 100755 +index 0000000..c7c7f27 +--- /dev/null ++++ b/tests/basic/tier/tier_lookup_heal.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++LAST_BRICK=1 ++CACHE_BRICK_FIRST=2 ++CACHE_BRICK_LAST=3 ++PROMOTE_TIMEOUT=5 ++ ++function file_on_fast_tier { ++ local ret="1" ++ ++ s1=$(md5sum $1) ++ s2=$(md5sum $B0/${V0}${CACHE_BRICK_FIRST}/$1) ++ ++ if [ -e $B0/${V0}${CACHE_BRICK_FIRST}/$1 ] && ! [ "$s1" == "$s2" ]; then ++ echo "0" ++ else ++ echo "1" ++ fi ++} ++ ++cleanup ++ ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files before CTR xlator is on. ++cd $M0 ++TEST stat . ++TEST touch file1 ++TEST stat file1 ++ ++#Attach tier and switch ON CTR Xlator. ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++TEST $CLI volume set $V0 features.ctr-enabled on ++TEST $CLI volume set $V0 cluster.tier-demote-frequency 4 ++TEST $CLI volume set $V0 cluster.tier-promote-frequency 4 ++TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 cluster.tier-mode test ++ ++#The lookup should heal the database. ++TEST ls file1 ++ ++# gf_file_tb and gf_flink_tb should NOT be empty ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}$LAST_BRICK/.glusterfs/${V0}$LAST_BRICK.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++# Heat-up the file ++uuidgen > file1 ++sleep 5 ++ ++#Check if the file is promoted ++EXPECT_WITHIN $PROMOTE_TIMEOUT "0" file_on_fast_tier file1 ++ ++cd; ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/basic/tier/tierd_check.t b/tests/basic/tier/tierd_check.t +new file mode 100644 +index 0000000..5701fa9 +--- /dev/null ++++ b/tests/basic/tier/tierd_check.t +@@ -0,0 +1,128 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++. $(dirname $0)/../../cluster.rc ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function check_peers { ++ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ ++function create_dist_tier_vol () { ++ TEST $CLI_1 volume create $V0 $H1:$B1/${V0} $H2:$B2/${V0} ++ TEST $CLI_1 volume start $V0 ++ TEST $CLI_1 volume tier $V0 attach $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 ++} ++ ++function tier_status () { ++ #$CLI_1 volume tier $V0 status | grep progress | wc -l ++ # I don't want to disable the entire test, but this part of it seems ++ # highly suspect. *Why* do we always expect the number of lines to be ++ # exactly two? What would it mean for it to be otherwise? Are we ++ # checking *correctness* of the result, or merely its *consistency* ++ # with what was observed at some unspecified time in the past? Does ++ # this check only serve to inhibit actual improvements? Until someone ++ # can answer these questions and explain why a hard-coded "2" is less ++ # arbitrary than what was here before, we might as well disable this ++ # part of the test. ++ echo "2" ++} ++ ++function tier_daemon_kill () { ++pkill -f "tierd/$V0" ++echo "$?" ++} ++ ++cleanup; ++ ++#setup cluster and test volume ++TEST launch_cluster 3; # start 3-node virtual cluster ++TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli ++TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; ++ ++#Create and start a tiered volume ++create_dist_tier_vol ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_check ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_kill ++ ++TEST $CLI_1 volume tier $V0 start ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_kill ++ ++TEST $CLI_3 volume tier $V0 start force ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check ++ ++#The pattern progress should occur twice only. ++#it shouldn't come up on the third node without tierd even ++#after the tier start force is issued on the node without ++#tierd ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++#kill the node on which tier is not supposed to run ++TEST kill_node 3 ++ ++#bring the node back, it should not have tierd running on it ++TEST $glusterd_3; ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++#after volume restart, check for tierd ++ ++TEST $CLI_3 volume stop $V0 ++ ++TEST $CLI_3 volume start $V0 ++ ++wait_for_tier_start ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++#check for detach start and stop ++ ++TEST $CLI_3 volume tier $V0 detach start ++ ++TEST $CLI_3 volume tier $V0 detach stop ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" tier_status ++ ++TEST $CLI_1 volume tier $V0 start force ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check ++ ++# To test for detach start fail while the brick is down ++ ++TEST pkill -f "$B1/$V0" ++ ++TEST ! $CLI_1 volume tier $V0 detach start ++ ++cleanup ++# This test isn't worth keeping. Besides the totally arbitrary tier_status ++# checks mentioned above, someone direct-coded pkill to kill bricks instead of ++# using the volume.rc function we already had. I can't be bothered fixing that, ++# and the next thing, and the next thing, unless there's a clear benefit to ++# doing so, and AFAICT the success or failure of this test tells us nothing ++# useful. Therefore, it's disabled until further notice. ++#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/basic/tier/unlink-during-migration.t b/tests/basic/tier/unlink-during-migration.t +new file mode 100755 +index 0000000..1330092 +--- /dev/null ++++ b/tests/basic/tier/unlink-during-migration.t +@@ -0,0 +1,92 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++DEMOTE_FREQ=5 ++PROMOTE_FREQ=5 ++ ++function create_dist_rep_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 replica 2 $H0:$B0/cold/${V0}{0..3} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume set $V0 features.ctr-enabled on ++ TEST $CLI volume start $V0 ++} ++ ++function attach_dist_rep_tier () { ++ TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/hot/${V0}{0..3} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 0 ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a volume ++create_dist_rep_vol ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++# Create a large file (320MB), so that rebalance takes time ++TEST dd if=/dev/zero of=$M0/foo bs=64k count=5120 ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name foo` ++echo "File path on cold tier: "$CPATH ++ ++#Now attach the tier ++attach_dist_rep_tier ++ ++#Write into the file to promote it ++echo "good morning">>$M0/foo ++ ++# Wait for the tier process to promote the file ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $CPATH ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name foo` ++ ++echo "File path on hot tier: "$HPATH ++TEST rm -rf $M0/foo ++TEST ! stat $HPATH ++TEST ! stat $CPATH ++ ++#unlink during demotion ++HPATH=""; ++CPATH=""; ++ ++# Create a large file (320MB), so that rebalance takes time ++TEST dd if=/dev/zero of=$M0/foo1 bs=64k count=5120 ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name foo1` ++echo "File path on hot tier : "$HPATH ++ ++EXPECT_WITHIN $REBALANCE_TIMEOUT "yes" is_sticky_set $HPATH ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name foo1` ++echo "File path on cold tier : "$CPATH ++ ++TEST rm -rf $M0/foo1 ++ ++TEST ! stat $HPATH ++TEST ! stat $CPATH ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t b/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t +new file mode 100644 +index 0000000..3b62a45 +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1303028-Rebalance-glusterd-rpc-connection-issue.t +@@ -0,0 +1,78 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{1..3} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{1..2} ++ TEST $CLI volume set $V0 cluster.tier-mode test ++} ++ ++function non_zero_check () { ++ if [ "$1" -ne 0 ] ++ then ++ echo "0" ++ else ++ echo "1" ++ fi ++} ++ ++function num_bricks_up { ++ local b ++ local n_up=0 ++ ++ for b in $B0/hot/${V0}{1..2} $B0/cold/${V0}{1..3}; do ++ if [ x"$(brick_up_status $V0 $H0 $b)" = x"1" ]; then ++ n_up=$((n_up+1)) ++ fi ++ done ++ ++ echo $n_up ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume status ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol ++# Wait for the bricks to come up, *then* the tier daemon. ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 5 num_bricks_up ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 tier_daemon_check ++sleep 5 #wait for some time to run tier daemon ++time_before_restarting=$(rebalance_run_time $V0); ++ ++#checking for elapsed time after sleeping for two seconds. ++EXPECT "0" non_zero_check $time_before_restarting; ++ ++#Difference of elapsed time should be positive ++ ++kill -9 $(pidof glusterd); ++TEST glusterd; ++sleep 2; ++# Wait for the bricks to come up, *then* the tier daemon. ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 5 num_bricks_up ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" tier_daemon_check; ++sleep 1; ++time1=$(rebalance_run_time $V0); ++EXPECT "0" non_zero_check $time1; ++sleep 2; ++time2=$(rebalance_run_time $V0); ++EXPECT "0" non_zero_check $time2; ++diff=`expr $time2 - $time1` ++EXPECT "0" non_zero_check $diff; +diff --git a/tests/bugs/quota/bug-1288474.t b/tests/bugs/quota/bug-1288474.t +new file mode 100755 +index 0000000..b8f4ba3 +--- /dev/null ++++ b/tests/bugs/quota/bug-1288474.t +@@ -0,0 +1,51 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++NUM_BRICKS=2 ++ ++function create_dist_tier_vol () { ++ mkdir -p $B0/cold/${V0}{0..$1} ++ mkdir -p $B0/hot/${V0}{0..$1} ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 nfs.disable false ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++} ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++touch $M0/foobar ++ ++TEST $CLI volume quota $V0 enable ++TEST $CLI volume quota $V0 limit-usage / 10MB ++ ++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5 ++ ++#check quota list after detach tier ++TEST $CLI volume tier $V0 detach start ++sleep 1 ++TEST $CLI volume tier $V0 detach force ++ ++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5 ++ ++#check quota list after attach tier ++rm -rf $B0/hot ++mkdir $B0/hot ++TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ ++EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quota_list_field "/" 5 ++ ++TEST umount $M0 ++ ++cleanup; ++ +diff --git a/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t b/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t +new file mode 100644 +index 0000000..9863834 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1290965-detect-bitrotten-objects.t +@@ -0,0 +1,53 @@ ++#!/bin/bash ++#Self-heal tests ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $CLI volume set $V0 entry-self-heal off ++TEST $CLI volume set $V0 metadata-self-heal off ++TEST $CLI volume set $V0 data-self-heal off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume start $V0 ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/brick{2,3} ++TEST $CLI volume bitrot $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++TEST $CLI volume bitrot $V0 scrub-frequency hourly ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++TEST dd if=/dev/urandom of=$M0/FILE bs=1024 count=1 ++ ++#Corrupt file from back-end ++TEST stat $B0/brick3/FILE ++echo "Corrupted data" >> $B0/brick3/FILE ++#Manually set bad-file xattr since we can't wait for an hour. ++TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/brick3/FILE ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status'; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++#Trigger lookup so that bitrot xlator marks file as bad in its inode context. ++stat $M0/FILE ++# Remove hot-tier ++TEST $CLI volume tier $V0 detach start ++sleep 1 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" detach_tier_status_field_complete $V0 ++TEST $CLI volume tier $V0 detach commit ++#Test that file has migrated to cold tier. ++EXPECT "1024" stat -c "%s" $B0/brick0/FILE ++EXPECT "1024" stat -c "%s" $B0/brick1/FILE ++TEST umount $M0 ++cleanup +diff --git a/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t b/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t +new file mode 100644 +index 0000000..b2d382a +--- /dev/null ++++ b/tests/bugs/tier/bug-1205545-CTR-and-trash-integration.t +@@ -0,0 +1,72 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++LAST_BRICK=3 ++CACHE_BRICK_FIRST=4 ++CACHE_BRICK_LAST=5 ++ ++cleanup ++ ++# Start glusterd [1-2] ++TEST glusterd ++TEST pidof glusterd ++ ++# Set-up tier cluster [3-4] ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} ++TEST $CLI volume start $V0 ++TEST $CLI volume tier $V0 attach replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST ++ ++# Start and mount the volume after enabling CTR and trash [5-8] ++TEST $CLI volume set $V0 features.ctr-enabled on ++TEST $CLI volume set $V0 features.trash on ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create an empty file ++touch $M0/foo ++ ++# gf_file_tb and gf_flink_tb should contain one entry each [9] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++# Create two hard links ++ln $M0/foo $M0/lnk1 ++ln $M0/foo $M0/lnk2 ++ ++# Now gf_flink_tb should contain 3 entries [10] ++ENTRY_COUNT=$(echo "select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 3 ] ++ ++# Delete the hard link ++rm -rf $M0/lnk1 ++ ++# Corresponding hard link entry must be removed from gf_flink_tb ++# but gf_file_tb should still contain the file entry [11] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 3 ] ++ ++# Remove the file ++rm -rf $M0/foo ++ ++# Another hardlink removed [12] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 2 ] ++ ++# Remove the last hardlink ++rm -rf $M0/lnk2 ++ ++# All entried must be removed from gf_flink_tb and gf_file_tb [13] ++ENTRY_COUNT=$(echo "select * from gf_file_tb; select * from gf_flink_tb;" | \ ++ sqlite3 $B0/${V0}5/.glusterfs/${V0}5.db | wc -l ) ++TEST [ $ENTRY_COUNT -eq 0 ] ++ ++cleanup ++ ++ ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/tests/bugs/tier/bug-1279376-rename-demoted-file.t b/tests/bugs/tier/bug-1279376-rename-demoted-file.t +new file mode 100755 +index 0000000..c4a50d9 +--- /dev/null ++++ b/tests/bugs/tier/bug-1279376-rename-demoted-file.t +@@ -0,0 +1,93 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../tier.rc ++ ++ ++NUM_BRICKS=2 ++DEMOTE_FREQ=15 ++DEMOTE_TIMEOUT=10 ++PROMOTE_FREQ=500 ++ ++ ++#Both src and dst files must hash to the same hot tier subvol ++SRC_FILE="file1.txt" ++DST_FILE="newfile1.txt" ++ ++ ++# Creates a tiered volume with pure distribute hot and cold tiers ++# Both hot and cold tiers will have an equal number of bricks. ++ ++function create_dist_tier_vol () { ++ mkdir $B0/cold ++ mkdir $B0/hot ++ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} ++ TEST $CLI volume set $V0 performance.quick-read off ++ TEST $CLI volume set $V0 performance.io-cache off ++ TEST $CLI volume start $V0 ++ TEST $CLI volume tier $V0 attach $H0:$B0/hot/${V0}{0..$1} ++ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ ++ TEST $CLI volume set $V0 cluster.tier-mode test ++ ++#We do not want any files to be promoted during this test ++ TEST $CLI volume set $V0 features.record-counters on ++ TEST $CLI volume set $V0 cluster.read-freq-threshold 50 ++ TEST $CLI volume set $V0 cluster.write-freq-threshold 50 ++} ++ ++ ++cleanup; ++ ++#Basic checks ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++ ++#Create and start a tiered volume ++create_dist_tier_vol $NUM_BRICKS ++ ++# Mount FUSE ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++ ++ ++# The file will be created on the hot tier ++ ++TEST touch "$M0/$SRC_FILE" ++ ++# Get the path of the file on the hot tier ++HPATH=`find $B0/hot/ -name "$SRC_FILE"` ++echo "File path on hot tier: "$HPATH ++ ++ ++EXPECT "yes" exists_and_regular_file $HPATH ++ ++# Wait for the tier process to demote the file ++sleep $DEMOTE_FREQ ++ ++# Get the path of the file on the cold tier ++CPATH=`find $B0/cold/ -name "$SRC_FILE"` ++echo "File path on cold tier: "$CPATH ++ ++EXPECT_WITHIN $DEMOTE_TIMEOUT "yes" exists_and_regular_file $CPATH ++ ++#We don't want $DST_FILE to get demoted ++TEST $CLI volume set $V0 cluster.tier-demote-frequency $PROMOTE_FREQ ++ ++#This will be created on the hot tier ++ ++touch "$M0/$DST_FILE" ++HPATH=`find $B0/hot/ -name "$DST_FILE"` ++echo "File path on hot tier: "$HPATH ++ ++TEST mv $M0/$SRC_FILE $M0/$DST_FILE ++ ++# We expect a single file to exist at this point ++# when viewed on the mountpoint ++EXPECT 1 echo $(ls -l $M0 | grep $DST_FILE | wc -l) ++ ++cleanup; ++ ++#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000 +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index b7c7bd9..ed24858 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -1859,6 +1859,78 @@ out: + return ret; + } + ++#if USE_GFDB /* only add changetimerecorder when GFDB is enabled */ ++static int ++brick_graph_add_changetimerecorder(volgen_graph_t *graph, ++ glusterd_volinfo_t *volinfo, ++ dict_t *set_dict, ++ glusterd_brickinfo_t *brickinfo) ++{ ++ xlator_t *xl = NULL; ++ int ret = -1; ++ char *brickname = NULL; ++ char *path = NULL; ++ char index_basepath[PATH_MAX] = {0}; ++ char *hotbrick = NULL; ++ ++ if (!graph || !volinfo || !set_dict || !brickinfo) ++ goto out; ++ ++ path = brickinfo->path; ++ ++ xl = volgen_graph_add(graph, "features/changetimerecorder", ++ volinfo->volname); ++ if (!xl) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "db-type", "sqlite3"); ++ if (ret) ++ goto out; ++ ++ if (!set_dict || dict_get_str(set_dict, "hot-brick", &hotbrick)) ++ hotbrick = "off"; ++ ++ ret = xlator_set_fixed_option(xl, "hot-brick", hotbrick); ++ if (ret) ++ goto out; ++ ++ brickname = strrchr(path, '/') + 1; ++ snprintf(index_basepath, sizeof(index_basepath), "%s.db", brickname); ++ ret = xlator_set_fixed_option(xl, "db-name", index_basepath); ++ if (ret) ++ goto out; ++ ++ snprintf(index_basepath, sizeof(index_basepath), "%s/%s", path, ++ ".glusterfs/"); ++ ret = xlator_set_fixed_option(xl, "db-path", index_basepath); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "record-exit", "off"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "ctr_link_consistency", "off"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "ctr_lookupheal_link_timeout", "300"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "ctr_lookupheal_inode_timeout", "300"); ++ if (ret) ++ goto out; ++ ++ ret = xlator_set_fixed_option(xl, "record-entry", "on"); ++ if (ret) ++ goto out; ++ ++out: ++ return ret; ++} ++#endif /* USE_GFDB */ ++ + static int + brick_graph_add_acl(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +@@ -2615,6 +2687,9 @@ static volgen_brick_xlator_t server_graph_table[] = { + {brick_graph_add_acl, "acl"}, + {brick_graph_add_bitrot_stub, "bitrot-stub"}, + {brick_graph_add_changelog, "changelog"}, ++#if USE_GFDB /* changetimerecorder depends on gfdb */ ++ {brick_graph_add_changetimerecorder, "changetimerecorder"}, ++#endif + {brick_graph_add_bd, "bd"}, + {brick_graph_add_trash, "trash"}, + {brick_graph_add_arbiter, "arbiter"}, +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index c8f6e67..a877805 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -11,6 +11,474 @@ cases as published by the Free Software Foundation. + #include "glusterd-volgen.h" + #include "glusterd-utils.h" + ++#if USE_GFDB /* no GFDB means tiering is disabled */ ++ ++static int ++get_tier_freq_threshold(glusterd_volinfo_t *volinfo, char *threshold_key) ++{ ++ int threshold = 0; ++ char *str_thresold = NULL; ++ int ret = -1; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ glusterd_volinfo_get(volinfo, threshold_key, &str_thresold); ++ if (str_thresold) { ++ ret = gf_string2int(str_thresold, &threshold); ++ if (ret == -1) { ++ threshold = ret; ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "Failed to convert " ++ "string to integer"); ++ } ++ } ++ ++ return threshold; ++} ++ ++/* ++ * Validation function for record-counters ++ * if write-freq-threshold and read-freq-threshold both have non-zero values ++ * record-counters cannot be set to off ++ * if record-counters is set to on ++ * check if both the frequency thresholds are zero, then pop ++ * a note, but volume set is not failed. ++ * */ ++static int ++validate_tier_counters(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ++ char *value, char **op_errstr) ++{ ++ char errstr[2048] = ""; ++ int ret = -1; ++ xlator_t *this = NULL; ++ gf_boolean_t origin_val = -1; ++ int current_wt = 0; ++ int current_rt = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(errstr, sizeof(errstr), ++ "Volume %s is not a tier " ++ "volume. Option %s is only valid for tier volume.", ++ volinfo->volname, key); ++ goto out; ++ } ++ ++ ret = gf_string2boolean(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an boolean value", ++ value, key); ++ goto out; ++ } ++ ++ current_rt = get_tier_freq_threshold(volinfo, ++ "cluster.read-freq-threshold"); ++ if (current_rt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value" ++ " of cluster.read-freq-threshold"); ++ goto out; ++ } ++ current_wt = get_tier_freq_threshold(volinfo, ++ "cluster.write-freq-threshold"); ++ if (current_wt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value " ++ "of cluster.write-freq-threshold"); ++ goto out; ++ } ++ /* If record-counters is set to off */ ++ if (!origin_val) { ++ /* Both the thresholds should be zero to set ++ * record-counters to off*/ ++ if (current_rt || current_wt) { ++ snprintf(errstr, sizeof(errstr), ++ "Cannot set features.record-counters to \"%s\"" ++ " as cluster.write-freq-threshold is %d" ++ " and cluster.read-freq-threshold is %d. Please" ++ " set both cluster.write-freq-threshold and " ++ " cluster.read-freq-threshold to 0, to set " ++ " features.record-counters to \"%s\".", ++ value, current_wt, current_rt, value); ++ ret = -1; ++ goto out; ++ } ++ } ++ /* TODO give a warning message to the user. errstr without re = -1 will ++ * not result in a warning on cli for now. ++ else { ++ if (!current_rt && !current_wt) { ++ snprintf (errstr, sizeof (errstr), ++ " Note : cluster.write-freq-threshold is %d" ++ " and cluster.read-freq-threshold is %d. Please" ++ " set both cluster.write-freq-threshold and " ++ " cluster.read-freq-threshold to" ++ " appropriate positive values.", ++ current_wt, current_rt); ++ } ++ }*/ ++ ++ ret = 0; ++out: ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ } ++ ++ return ret; ++} ++ ++/* ++ * Validation function for ctr sql params ++ * features.ctr-sql-db-cachesize (Range: 1000 to 262144 pages) ++ * features.ctr-sql-db-wal-autocheckpoint (Range: 1000 to 262144 pages) ++ * */ ++static int ++validate_ctr_sql_params(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ++ char *value, char **op_errstr) ++{ ++ int ret = -1; ++ xlator_t *this = NULL; ++ char errstr[2048] = ""; ++ int origin_val = -1; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ ret = gf_string2int(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ if (origin_val < 0) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s expects a positive" ++ "integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "sql-db-cachesize") || ++ strstr(key, "sql-db-wal-autocheckpoint")) { ++ if ((origin_val < 1000) || (origin_val > 262144)) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s " ++ "expects a value between : " ++ "1000 to 262144.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ ret = 0; ++out: ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ } ++ return ret; ++} ++ ++/* Validation for tiering frequency thresholds ++ * If any of the frequency thresholds are set to a non-zero value, ++ * switch record-counters on, if not already on ++ * If both the frequency thresholds are set to zero, ++ * switch record-counters off, if not already off ++ * */ ++static int ++validate_tier_thresholds(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ++ char *value, char **op_errstr) ++{ ++ char errstr[2048] = ""; ++ int ret = -1; ++ xlator_t *this = NULL; ++ int origin_val = -1; ++ gf_boolean_t current_rc = _gf_false; ++ int current_wt = 0; ++ int current_rt = 0; ++ gf_boolean_t is_set_rc = _gf_false; ++ char *proposed_rc = NULL; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(errstr, sizeof(errstr), ++ "Volume %s is not a tier " ++ "volume. Option %s is only valid for tier volume.", ++ volinfo->volname, key); ++ goto out; ++ } ++ ++ ret = gf_string2int(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ if (origin_val < 0) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s expects a positive" ++ "integer value.", ++ value, key); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Get the record-counters value */ ++ ret = glusterd_volinfo_get_boolean(volinfo, "features.record-counters"); ++ if (ret == -1) { ++ snprintf(errstr, sizeof(errstr), ++ "Failed to retrieve value of" ++ "features.record-counters from volume info"); ++ goto out; ++ } ++ current_rc = ret; ++ ++ /* if any of the thresholds are set to a non-zero value ++ * switch record-counters on, if not already on*/ ++ if (origin_val > 0) { ++ if (!current_rc) { ++ is_set_rc = _gf_true; ++ current_rc = _gf_true; ++ } ++ } else { ++ /* if the set is for write-freq-threshold */ ++ if (strstr(key, "write-freq-threshold")) { ++ current_rt = get_tier_freq_threshold(volinfo, ++ "cluster.read-freq-threshold"); ++ if (current_rt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value of" ++ "cluster.read-freq-threshold"); ++ goto out; ++ } ++ current_wt = origin_val; ++ } ++ /* else it should be read-freq-threshold */ ++ else { ++ current_wt = get_tier_freq_threshold( ++ volinfo, "cluster.write-freq-threshold"); ++ if (current_wt == -1) { ++ snprintf(errstr, sizeof(errstr), ++ " Failed to retrieve value of" ++ "cluster.write-freq-threshold"); ++ goto out; ++ } ++ current_rt = origin_val; ++ } ++ ++ /* Since both the thresholds are zero, set record-counters ++ * to off, if not already off */ ++ if (current_rt == 0 && current_wt == 0) { ++ if (current_rc) { ++ is_set_rc = _gf_true; ++ current_rc = _gf_false; ++ } ++ } ++ } ++ ++ /* if record-counter has to be set to proposed value */ ++ if (is_set_rc) { ++ if (current_rc) { ++ ret = gf_asprintf(&proposed_rc, "on"); ++ } else { ++ ret = gf_asprintf(&proposed_rc, "off"); ++ } ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "Failed to allocate memory to dict_value"); ++ goto error; ++ } ++ ret = dict_set_str(volinfo->dict, "features.record-counters", ++ proposed_rc); ++ error: ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "Failed to set features.record-counters" ++ "to \"%s\" automatically." ++ "Please try to set features.record-counters " ++ "\"%s\" manually. The options " ++ "cluster.write-freq-threshold and " ++ "cluster.read-freq-threshold can only " ++ "be set to a non zero value, if " ++ "features.record-counters is " ++ "set to \"on\".", ++ proposed_rc, proposed_rc); ++ goto out; ++ } ++ } ++ ret = 0; ++out: ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ if (proposed_rc) ++ GF_FREE(proposed_rc); ++ } ++ return ret; ++} ++ ++static int ++validate_tier(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, ++ char **op_errstr) ++{ ++ char errstr[2048] = ""; ++ int ret = 0; ++ xlator_t *this = NULL; ++ int origin_val = -1; ++ char *current_wm_hi = NULL; ++ char *current_wm_low = NULL; ++ uint64_t wm_hi = 0; ++ uint64_t wm_low = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ++ snprintf(errstr, sizeof(errstr), ++ "Volume %s is not a tier " ++ "volume. Option %s is only valid for tier volume.", ++ volinfo->volname, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "cluster.tier-mode")) { ++ if (strcmp(value, "test") && strcmp(value, "cache")) { ++ ret = -1; ++ goto out; ++ } ++ goto out; ++ } else if (strstr(key, "tier-pause")) { ++ if (strcmp(value, "off") && strcmp(value, "on")) { ++ ret = -1; ++ goto out; ++ } ++ goto out; ++ } else if (strstr(key, "tier-compact")) { ++ if (strcmp(value, "on") && strcmp(value, "off")) { ++ ret = -1; ++ goto out; ++ } ++ ++ goto out; ++ } ++ ++ /* ++ * Rest of the volume set options for tier are expecting a positive ++ * Integer. Change the function accordingly if this constraint is ++ * changed. ++ */ ++ ret = gf_string2int(value, &origin_val); ++ if (ret) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a compatible " ++ "value. %s expects an integer value.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "watermark-hi") || strstr(key, "watermark-low")) { ++ if ((origin_val < 1) || (origin_val > 99)) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ "compatible value. %s expects a " ++ "percentage from 1-99.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ ++ if (strstr(key, "watermark-hi")) { ++ wm_hi = origin_val; ++ } else { ++ glusterd_volinfo_get(volinfo, "cluster.watermark-hi", ++ ¤t_wm_hi); ++ gf_string2bytesize_uint64(current_wm_hi, &wm_hi); ++ } ++ ++ if (strstr(key, "watermark-low")) { ++ wm_low = origin_val; ++ } else { ++ glusterd_volinfo_get(volinfo, "cluster.watermark-low", ++ ¤t_wm_low); ++ gf_string2bytesize_uint64(current_wm_low, &wm_low); ++ } ++ if (wm_low >= wm_hi) { ++ snprintf(errstr, sizeof(errstr), ++ "lower watermark" ++ " cannot be equal or exceed upper " ++ "watermark."); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ } else if (strstr(key, "tier-promote-frequency") || ++ strstr(key, "tier-max-mb") || ++ strstr(key, "tier-max-promote-file-size") || ++ strstr(key, "tier-max-files") || ++ strstr(key, "tier-demote-frequency") || ++ strstr(key, "tier-hot-compact-frequency") || ++ strstr(key, "tier-cold-compact-frequency") || ++ strstr(key, "tier-query-limit")) { ++ if (origin_val < 1) { ++ snprintf(errstr, sizeof(errstr), ++ "%s is not a " ++ " compatible value. %s expects a positive " ++ "integer value greater than 0.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, ++ "%s", errstr); ++ *op_errstr = gf_strdup(errstr); ++ ret = -1; ++ goto out; ++ } ++ } ++out: ++ gf_msg_debug(this->name, 0, "Returning %d", ret); ++ ++ return ret; ++} ++ ++#endif /* End for USE_GFDB */ ++ + static int + validate_cache_max_min_size(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +@@ -2485,6 +2953,261 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "/var/run/gluster/shared_storage on enabling this " + "option. Unmount and delete the shared storage volume " + " on disabling this option."}, ++#if USE_GFDB /* no GFDB means tiering is disabled */ ++ /* tier translator - global tunables */ ++ {.key = "cluster.write-freq-threshold", ++ .voltype = "cluster/tier", ++ .value = "0", ++ .option = "write-freq-threshold", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier_thresholds, ++ .description = "Defines the number of writes, in a promotion/demotion" ++ " cycle, that would mark a file HOT for promotion. Any" ++ " file that has write hits less than this value will " ++ "be considered as COLD and will be demoted."}, ++ {.key = "cluster.read-freq-threshold", ++ .voltype = "cluster/tier", ++ .value = "0", ++ .option = "read-freq-threshold", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier_thresholds, ++ .description = "Defines the number of reads, in a promotion/demotion " ++ "cycle, that would mark a file HOT for promotion. Any " ++ "file that has read hits less than this value will be " ++ "considered as COLD and will be demoted."}, ++ { ++ .key = "cluster.tier-pause", ++ .voltype = "cluster/tier", ++ .option = "tier-pause", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ { ++ .key = "cluster.tier-promote-frequency", ++ .voltype = "cluster/tier", ++ .value = "120", ++ .option = "tier-promote-frequency", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ { ++ .key = "cluster.tier-demote-frequency", ++ .voltype = "cluster/tier", ++ .value = "3600", ++ .option = "tier-demote-frequency", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ {.key = "cluster.watermark-hi", ++ .voltype = "cluster/tier", ++ .value = "90", ++ .option = "watermark-hi", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "Upper % watermark for promotion. If hot tier fills" ++ " above this percentage, no promotion will happen and demotion will " ++ "happen with high probability."}, ++ {.key = "cluster.watermark-low", ++ .voltype = "cluster/tier", ++ .value = "75", ++ .option = "watermark-low", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "Lower % watermark. If hot tier is less " ++ "full than this, promotion will happen and demotion will not happen. " ++ "If greater than this, promotion/demotion will happen at a " ++ "probability " ++ "relative to how full the hot tier is."}, ++ {.key = "cluster.tier-mode", ++ .voltype = "cluster/tier", ++ .option = "tier-mode", ++ .value = "cache", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "Either 'test' or 'cache'. Test mode periodically" ++ " demotes or promotes files automatically based on access." ++ " Cache mode does so based on whether the cache is full or not," ++ " as specified with watermarks."}, ++ {.key = "cluster.tier-max-promote-file-size", ++ .voltype = "cluster/tier", ++ .option = "tier-max-promote-file-size", ++ .value = "0", ++ .op_version = GD_OP_VERSION_3_7_10, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = ++ "The maximum file size in bytes that is promoted. If 0, there" ++ " is no maximum size (default)."}, ++ {.key = "cluster.tier-max-mb", ++ .voltype = "cluster/tier", ++ .option = "tier-max-mb", ++ .value = "4000", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = "The maximum number of MB that may be migrated" ++ " in any direction in a given cycle by a single node."}, ++ {.key = "cluster.tier-max-files", ++ .voltype = "cluster/tier", ++ .option = "tier-max-files", ++ .value = "10000", ++ .op_version = GD_OP_VERSION_3_7_6, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = "The maximum number of files that may be migrated" ++ " in any direction in a given cycle by a single node."}, ++ {.key = "cluster.tier-query-limit", ++ .voltype = "cluster/tier", ++ .option = "tier-query-limit", ++ .value = "100", ++ .op_version = GD_OP_VERSION_3_9_1, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .type = NO_DOC, ++ .description = "The maximum number of files that may be migrated " ++ "during an emergency demote. An emergency condition " ++ "is flagged when writes breach the hi-watermark."}, ++ {.key = "cluster.tier-compact", ++ .voltype = "cluster/tier", ++ .option = "tier-compact", ++ .value = "on", ++ .op_version = GD_OP_VERSION_3_9_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ .description = "Activate or deactivate the compaction of the DB" ++ " for the volume's metadata."}, ++ { ++ .key = "cluster.tier-hot-compact-frequency", ++ .voltype = "cluster/tier", ++ .value = "604800", ++ .option = "tier-hot-compact-frequency", ++ .op_version = GD_OP_VERSION_3_9_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ { ++ .key = "cluster.tier-cold-compact-frequency", ++ .voltype = "cluster/tier", ++ .value = "604800", ++ .option = "tier-cold-compact-frequency", ++ .op_version = GD_OP_VERSION_3_9_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT, ++ .validate_fn = validate_tier, ++ }, ++ {.key = "features.ctr-enabled", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "ctr-enabled", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .description = "Enable CTR xlator"}, ++ {.key = "features.record-counters", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "record-counters", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .validate_fn = validate_tier_counters, ++ .description = "Its a Change Time Recorder Xlator option to " ++ "enable recording write " ++ "and read heat counters. The default is disabled. " ++ "If enabled, \"cluster.write-freq-threshold\" and " ++ "\"cluster.read-freq-threshold\" defined the number " ++ "of writes (or reads) to a given file are needed " ++ "before triggering migration."}, ++ {.key = "features.ctr-record-metadata-heat", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "ctr-record-metadata-heat", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .type = NO_DOC, ++ .description = "Its a Change Time Recorder Xlator option to " ++ "enable recording write heat on metadata of the file. " ++ "The default is disabled. " ++ "Metadata is inode attributes like atime, mtime," ++ " permissions etc and " ++ "extended attributes of a file ."}, ++ {.key = "features.ctr_link_consistency", ++ .voltype = "features/changetimerecorder", ++ .value = "off", ++ .option = "ctr_link_consistency", ++ .op_version = GD_OP_VERSION_3_7_0, ++ .type = NO_DOC, ++ .description = "Enable a crash consistent way of recording hardlink " ++ "updates by Change Time Recorder Xlator. " ++ "When recording in a crash " ++ "consistent way the data operations will " ++ "experience more latency."}, ++ {.key = "features.ctr_lookupheal_link_timeout", ++ .voltype = "features/changetimerecorder", ++ .value = "300", ++ .option = "ctr_lookupheal_link_timeout", ++ .op_version = GD_OP_VERSION_3_7_2, ++ .type = NO_DOC, ++ .description = "Defines the expiry period of in-memory " ++ "hardlink of an inode," ++ "used by lookup heal in Change Time Recorder." ++ "Once the expiry period" ++ "hits an attempt to heal the database per " ++ "hardlink is done and the " ++ "in-memory hardlink period is reset"}, ++ {.key = "features.ctr_lookupheal_inode_timeout", ++ .voltype = "features/changetimerecorder", ++ .value = "300", ++ .option = "ctr_lookupheal_inode_timeout", ++ .op_version = GD_OP_VERSION_3_7_2, ++ .type = NO_DOC, ++ .description = "Defines the expiry period of in-memory inode," ++ "used by lookup heal in Change Time Recorder. " ++ "Once the expiry period" ++ "hits an attempt to heal the database per " ++ "inode is done"}, ++ {.key = "features.ctr-sql-db-cachesize", ++ .voltype = "features/changetimerecorder", ++ .value = "12500", ++ .option = "sql-db-cachesize", ++ .validate_fn = validate_ctr_sql_params, ++ .op_version = GD_OP_VERSION_3_7_7, ++ .description = "Defines the cache size of the sqlite database of " ++ "changetimerecorder xlator." ++ "The input to this option is in pages." ++ "Each page is 4096 bytes. Default value is 12500 " ++ "pages." ++ "The max value is 262144 pages i.e 1 GB and " ++ "the min value is 1000 pages i.e ~ 4 MB. "}, ++ {.key = "features.ctr-sql-db-wal-autocheckpoint", ++ .voltype = "features/changetimerecorder", ++ .value = "25000", ++ .option = "sql-db-wal-autocheckpoint", ++ .validate_fn = validate_ctr_sql_params, ++ .op_version = GD_OP_VERSION_3_7_7, ++ .description = "Defines the autocheckpoint of the sqlite database of " ++ " changetimerecorder. " ++ "The input to this option is in pages. " ++ "Each page is 4096 bytes. Default value is 25000 " ++ "pages." ++ "The max value is 262144 pages i.e 1 GB and " ++ "the min value is 1000 pages i.e ~4 MB."}, ++ {.key = VKEY_FEATURES_SELINUX, ++ .voltype = "features/selinux", ++ .type = NO_DOC, ++ .value = "on", ++ .op_version = GD_OP_VERSION_3_11_0, ++ .description = "Convert security.selinux xattrs to " ++ "trusted.gluster.selinux on the bricks. Recommended " ++ "to have enabled when clients and/or bricks support " ++ "SELinux."}, ++ ++#endif /* USE_GFDB */ + { + .key = "locks.trace", + .voltype = "features/locks", +-- +1.8.3.1 + diff --git a/SOURCES/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch b/SOURCES/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch new file mode 100644 index 0000000..1f4a767 --- /dev/null +++ b/SOURCES/0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch @@ -0,0 +1,89 @@ +From 144f2eb56d1bbecc9c455065755f41ec81974e3e Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Sun, 7 Apr 2019 21:54:07 +0530 +Subject: [PATCH 087/124] ganesha : fixing minor issues after the backport from + 3.4 + +label : DOWNSTREAM ONLY + +Change-Id: Ib0f6d8728d2e33da63ed4baab0bb981a0b06a8e0 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167168 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 20 +++++++++++++++++--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + 2 files changed, 18 insertions(+), 4 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index e0607ba..f6b823d 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -883,6 +883,15 @@ sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sha + install -D -p -m 0644 extras/glusterfs-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs + ++# ganesha ghosts ++%if ( 0%{!?_without_server:1} ) ++mkdir -p %{buildroot}%{_sysconfdir}/ganesha ++touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf ++mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ ++touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf ++touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf ++%endif ++ + %if ( 0%{!?_without_georeplication:1} ) + mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication + touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf +@@ -1197,7 +1206,7 @@ exit 0 + + %if ( 0%{?_without_server:1} ) + #exclude ganesha related files +-%exclude %{_sysconfdir}/ganesha/* ++%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample + %exclude %{_libexecdir}/ganesha/* + %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif +@@ -1376,9 +1385,15 @@ exit 0 + + %if ( 0%{!?_without_server:1} ) + %files ganesha +-%{_sysconfdir}/ganesha/* ++%dir %{_libexecdir}/ganesha ++%{_sysconfdir}/ganesha/ganesha-ha.conf.sample + %{_libexecdir}/ganesha/* + %{_prefix}/lib/ocf/resource.d/heartbeat/* ++%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh ++%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf ++%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha ++%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf ++%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf + %endif + + %if ( 0%{!?_without_ocf:1} ) +@@ -1508,7 +1523,6 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index d882105..0a16925 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -278,7 +278,7 @@ glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr) + goto out; + } + +- if (strcmp(value, "enable")) { ++ if (strcmp(value, "enable") == 0) { + ret = start_ganesha(op_errstr); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL, +-- +1.8.3.1 + diff --git a/SOURCES/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch b/SOURCES/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch new file mode 100644 index 0000000..db831a2 --- /dev/null +++ b/SOURCES/0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch @@ -0,0 +1,74 @@ +From bbcfd7e28b43845bac675dcc486bde09b0953f64 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Thu, 11 Apr 2019 14:40:11 +0530 +Subject: [PATCH 088/124] tier: fix failures noticed during tier start and tier + restart. + +Problem 1: when tier is started using the tier start command, +the out put was skipped during a failure. failures don't have an +transaction id. this id was checked and if its missing then +it skips. + +fix: had to remove the unnecessary jump for that case. + +problem 2: When tier was restarted, the tierd doesn't come online. +This was because, there were a certain values that were supposed +to be stored in glusterd (gluster-store.c) which will be used +during restart to come to the original state. +These values were stored. as they were missing, tierd didn't come +online. + +fix: store the value and make it available during the start. + +Label: DOWNSTREAM ONLY + +Change-Id: I7df898fa4c3b72fe8ded4adbf573307a59a37e5e +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167653 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + cli/src/cli-rpc-ops.c | 1 - + xlators/mgmt/glusterd/src/glusterd-store.c | 13 +++++++++++++ + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 736cd18..b167e26 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -1973,7 +1973,6 @@ gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + if (ret) { + gf_log("cli", GF_LOG_WARNING, "failed to get %s from dict", + GF_REBALANCE_TID_KEY); +- goto out; + } + if (rsp.op_ret && strcmp(rsp.op_errstr, "")) { + snprintf(msg, sizeof(msg), "%s", rsp.op_errstr); +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 351bd9e..4889217 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -3336,6 +3336,19 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo) + break; + + case GF_CLUSTER_TYPE_TIER: ++ if (volinfo->tier_info.cold_type == ++ GF_CLUSTER_TYPE_DISPERSE) ++ volinfo->tier_info.cold_dist_leaf_count ++ = volinfo->disperse_count; ++ else ++ volinfo->tier_info.cold_dist_leaf_count ++ = glusterd_calc_dist_leaf_count ( ++ volinfo->tier_info. ++ cold_replica_count, ++ 1); ++ ++ break; ++ + case GF_CLUSTER_TYPE_STRIPE: + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + gf_msg(this->name, GF_LOG_CRITICAL, ENOTSUP, +-- +1.8.3.1 + diff --git a/SOURCES/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch b/SOURCES/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch new file mode 100644 index 0000000..6bdb7fc --- /dev/null +++ b/SOURCES/0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch @@ -0,0 +1,85 @@ +From 8ba7e04362019ea2d0e80e67eb214d53dca58774 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Thu, 17 Nov 2016 12:44:38 +0530 +Subject: [PATCH 089/124] glusterd/gNFS : On post upgrade to 3.2, disable gNFS + for all volumes + +Currently on 3.2 gNFS is dsiabled for newly created volumes or old volumes +with default value. There will be volumes which have explicitly turn off +nfs.disable option. This change disable gNFS even for that volume as well. + +label : DOWNSTREAM ONLY + +Change-Id: I4ddeb23690271034b0bbb3fc50b359350b5eae87 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/90425 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: Atin Mukherjee <amukherj@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167573 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 43 ++++++++++++++++++------------ + 1 file changed, 26 insertions(+), 17 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 10e2d48..06ea8cf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2501,26 +2501,35 @@ glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo) + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade +- * from anything below than 3.9.0 to 3.9.x the volume's dictionary will +- * not have 'nfs.disable' key set which means the same will not be set +- * to on until explicitly done. setnfs.disable to 'on' at op-version +- * bump up flow is the ideal way here. The same is also applicable for +- * transport.address-family where if the transport type is set to tcp +- * then transport.address-family is defaulted to 'inet'. ++ * from anything below than 3.9.0 to 3.9.x, the value for nfs.disable is ++ * set to 'on' for all volumes even if it is explicitly set to 'off' in ++ * previous version. This change is only applicable to downstream code. ++ * Setting nfs.disable to 'on' at op-version bump up flow is the ideal ++ * way here. The same is also applicable for transport.address-family ++ * where if the transport type is set to tcp then transport.address-family ++ * is defaulted to 'inet'. + */ + if (conf->op_version >= GD_OP_VERSION_3_9_0) { +- if (dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 1)) { +- ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, +- "on"); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, +- "Failed to set " +- "option ' NFS_DISABLE_MAP_KEY ' on " +- "volume %s", +- volinfo->volname); +- goto out; +- } ++ if (!(dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 0))) { ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ "Gluster NFS is" ++ " being deprecated in favor of NFS-Ganesha, " ++ "hence setting nfs.disable to 'on' for volume " ++ "%s. Please re-enable it if requires", ++ volinfo->volname); ++ } ++ ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, ++ "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set " ++ "option ' NFS_DISABLE_MAP_KEY ' on " ++ "volume %s", ++ volinfo->volname); ++ goto out; + } ++ + ret = dict_get_strn(volinfo->dict, "transport.address-family", + SLEN("transport.address-family"), + &address_family_str); +-- +1.8.3.1 + diff --git a/SOURCES/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch b/SOURCES/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch new file mode 100644 index 0000000..cbe1403 --- /dev/null +++ b/SOURCES/0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch @@ -0,0 +1,307 @@ +From 19210e4fc551893d1545e719fa26d9ad2d2f5cba Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 13 Nov 2017 18:41:58 +0530 +Subject: [PATCH 090/124] Revert "build: conditionally build legacy gNFS server + and associated sub-packaging" + +This reverts commit 83abcba6b42f94eb5a6495a634d4055362a9d79d. + +label : DOWNSTREAM ONLY + +Change-Id: If1c02d80b746e0a5b5e2c9a3625909158eff55d5 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167575 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + configure.ac | 13 ------- + extras/LinuxRPM/Makefile.am | 4 +-- + glusterfs.spec.in | 54 ++++++---------------------- + xlators/Makefile.am | 5 +-- + xlators/mgmt/glusterd/src/glusterd-nfs-svc.c | 27 ++++++-------- + 5 files changed, 24 insertions(+), 79 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 633e850..521671b 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1359,18 +1359,6 @@ if test -n "$LIBAIO"; then + BUILD_LIBAIO=yes + fi + +-dnl gnfs section +-BUILD_GNFS="no" +-AC_ARG_ENABLE([gnfs], +- AC_HELP_STRING([--enable-gnfs], +- [Enable legacy gnfs server xlator.])) +-if test "x${with_server}" = "xyes" -a "x$enable_gnfs" = "xyes"; then +- BUILD_GNFS="yes" +-fi +-AM_CONDITIONAL([BUILD_GNFS], [test x$BUILD_GNFS = xyes]) +-AC_SUBST(BUILD_GNFS) +-dnl end gnfs section +- + dnl Check for userspace-rcu + PKG_CHECK_MODULES([URCU], [liburcu-bp], [], + [AC_CHECK_HEADERS([urcu-bp.h], +@@ -1624,7 +1612,6 @@ echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" + echo "Use memory pools : $USE_MEMPOOL" + echo "Nanosecond m/atimes : $BUILD_NANOSECOND_TIMESTAMPS" + echo "Server components : $with_server" +-echo "Legacy gNFS server : $BUILD_GNFS" + echo "IPV6 default : $with_ipv6_default" + echo "Use TIRPC : $with_libtirpc" + echo "With Python : ${PYTHON_VERSION}" +diff --git a/extras/LinuxRPM/Makefile.am b/extras/LinuxRPM/Makefile.am +index f028537..61fd6da 100644 +--- a/extras/LinuxRPM/Makefile.am ++++ b/extras/LinuxRPM/Makefile.am +@@ -18,7 +18,7 @@ autogen: + cd ../.. && \ + rm -rf autom4te.cache && \ + ./autogen.sh && \ +- ./configure --enable-gnfs --with-previous-options ++ ./configure --with-previous-options + + prep: + $(MAKE) -C ../.. dist; +@@ -36,7 +36,7 @@ srcrpm: + mv rpmbuild/SRPMS/* . + + rpms: +- rpmbuild --define '_topdir $(shell pwd)/rpmbuild' --with gnfs -bb rpmbuild/SPECS/glusterfs.spec ++ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bb rpmbuild/SPECS/glusterfs.spec + mv rpmbuild/RPMS/*/* . + + # EPEL-5 does not like new versions of rpmbuild and requires some +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index f6b823d..cb17eaa 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -52,11 +52,6 @@ + # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without georeplication + %{?_without_georeplication:%global _without_georeplication --disable-georeplication} + +-# gnfs +-# if you wish to compile an rpm with the legacy gNFS server xlator +-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with gnfs +-%{?_with_gnfs:%global _with_gnfs --enable-gnfs} +- + # ipv6default + # if you wish to compile an rpm with IPv6 default... + # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with ipv6default +@@ -153,7 +148,6 @@ + %if 0%{?_without_server:1} + %global _without_events --disable-events + %global _without_georeplication --disable-georeplication +-%global _with_gnfs %{nil} + %global _without_tiering --disable-tiering + %global _without_ocf --without-ocf + %endif +@@ -525,25 +519,6 @@ is in userspace and easily manageable. + This package provides support to geo-replication. + %endif + +-%if ( 0%{?_with_gnfs:1} ) +-%package gnfs +-Summary: GlusterFS gNFS server +-Requires: %{name}%{?_isa} = %{version}-%{release} +-Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} +-Requires: nfs-utils +- +-%description gnfs +-GlusterFS is a distributed file-system capable of scaling to several +-petabytes. It aggregates various storage bricks over Infiniband RDMA +-or TCP/IP interconnect into one large parallel network file +-system. GlusterFS is one of the most sophisticated file systems in +-terms of features and extensibility. It borrows a powerful concept +-called Translators from GNU Hurd kernel. Much of the code in GlusterFS +-is in user space and easily manageable. +- +-This package provides the glusterfs legacy gNFS server xlator +-%endif +- + %package libs + Summary: GlusterFS common libraries + +@@ -659,6 +634,7 @@ Requires: %{name}-api%{?_isa} = %{version}-%{release} + Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} + # lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server + Requires: lvm2 ++Requires: nfs-utils + %if ( 0%{?_with_systemd:1} ) + %{?systemd_requires} + %else +@@ -789,7 +765,6 @@ export LDFLAGS + %{?_with_cmocka} \ + %{?_with_debug} \ + %{?_with_firewalld} \ +- %{?_with_gnfs} \ + %{?_with_tmpfilesdir} \ + %{?_with_tsan} \ + %{?_with_valgrind} \ +@@ -1286,17 +1261,6 @@ exit 0 + %{_bindir}/fusermount-glusterfs + %endif + +-%if ( 0%{?_with_gnfs:1} && 0%{!?_without_server:1} ) +-%files gnfs +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs +- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/server.so +-%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs +-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol +-%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run +-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid +-%endif +- + %files thin-arbiter + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features +@@ -1409,11 +1373,6 @@ exit 0 + %config(noreplace) %{_sysconfdir}/glusterfs + %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol + %exclude %{_sysconfdir}/glusterfs/eventsconfig.json +-%exclude %{_sharedstatedir}/glusterd/nfs/nfs-server.vol +-%exclude %{_sharedstatedir}/glusterd/nfs/run/nfs.pid +-%if ( 0%{?_with_gnfs:1} ) +-%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/* +-%endif + %config(noreplace) %{_sysconfdir}/sysconfig/glusterd + %if ( 0%{_for_fedora_koji_builds} ) + %config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd +@@ -1461,6 +1420,7 @@ exit 0 + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so ++ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs* + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol +@@ -1477,6 +1437,7 @@ exit 0 + + # /var/lib/glusterd, e.g. hookscripts, etc. + %ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info ++%ghost %attr(0600,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/options + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/bitd + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups +@@ -1529,7 +1490,11 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S30samba-stop.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S29CTDB-teardown.sh +-%config(noreplace) %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options ++%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs ++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol ++%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run ++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid ++%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub +@@ -1995,6 +1960,9 @@ fi + %endif + + %changelog ++* Sun Apr 7 2019 Jiffin Tony Thottan <jthottan@redhat.com> ++- DOWNSTREAM ONLY - revert of 83abcb(gnfs in an optional subpackage) ++ + * Sun Apr 7 2019 Soumya Koduri <skoduri@redhat.com> + - As an interim fix add dependency on netstat(/net-tools) for glusterfs-ganesha package (#1395574) + +diff --git a/xlators/Makefile.am b/xlators/Makefile.am +index ef20cbb..be54eb3 100644 +--- a/xlators/Makefile.am ++++ b/xlators/Makefile.am +@@ -1,12 +1,9 @@ +-if BUILD_GNFS +- GNFS_DIR = nfs +-endif + + DIST_SUBDIRS = cluster storage protocol performance debug features \ + mount nfs mgmt system playground meta + + SUBDIRS = cluster storage protocol performance debug features \ +- mount ${GNFS_DIR} mgmt system playground meta ++ mount nfs mgmt system playground meta + + EXTRA_DIST = xlator.sym + +diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c +index 36e9052..3960031 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c +@@ -18,6 +18,8 @@ + #include "glusterd-messages.h" + #include "glusterd-svc-helper.h" + ++static char *nfs_svc_name = "nfs"; ++ + static gf_boolean_t + glusterd_nfssvc_need_start() + { +@@ -41,6 +43,12 @@ glusterd_nfssvc_need_start() + return start; + } + ++int ++glusterd_nfssvc_init(glusterd_svc_t *svc) ++{ ++ return glusterd_svc_init(svc, nfs_svc_name); ++} ++ + static int + glusterd_nfssvc_create_volfile() + { +@@ -49,7 +57,7 @@ glusterd_nfssvc_create_volfile() + }; + glusterd_conf_t *conf = THIS->private; + +- glusterd_svc_build_volfile_path(conf->nfs_svc.name, conf->workdir, filepath, ++ glusterd_svc_build_volfile_path(nfs_svc_name, conf->workdir, filepath, + sizeof(filepath)); + return glusterd_create_global_volfile(build_nfs_graph, filepath, NULL); + } +@@ -60,7 +68,7 @@ glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags) + int ret = -1; + + if (!svc->inited) { +- ret = glusterd_svc_init(svc, "nfs"); ++ ret = glusterd_nfssvc_init(svc); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_NFSSVC, + "Failed to init nfs service"); +@@ -75,13 +83,6 @@ glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags) + if (ret) + goto out; + +- /* not an error, or a (very) soft error at best */ +- if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) { +- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED, +- "nfs/server.so xlator is not installed"); +- goto out; +- } +- + ret = glusterd_nfssvc_create_volfile(); + if (ret) + goto out; +@@ -155,14 +156,6 @@ glusterd_nfssvc_reconfigure() + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + +- /* not an error, or a (very) soft error at best */ +- if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) { +- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED, +- "nfs/server.so xlator is not installed"); +- ret = 0; +- goto out; +- } +- + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +-- +1.8.3.1 + diff --git a/SOURCES/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch b/SOURCES/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch new file mode 100644 index 0000000..292b3f5 --- /dev/null +++ b/SOURCES/0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch @@ -0,0 +1,110 @@ +From ca3a4ebeddfef8c6909ff5388787a91ee52fd675 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Thu, 15 Dec 2016 17:14:01 +0530 +Subject: [PATCH 091/124] glusterd/gNFS : explicitly set "nfs.disable" to "off" + after 3.2 upgrade + +Gluster NFS was enabled by default for all volumes till 3.1. But 3.2 onwards +for the new volumes it will be disabled by setting "nfs.disable" to "on". +This take patch will take care of existing volume in such a way that if the +option is not configured, it will set "nfs.disable" to "off" during op-version +bump up. + +Also this patch removes the warning message while enabling gluster NFS for +a volume. + +label : DOWNSTREAM ONLY + +Change-Id: Ib199c3180204f917791b4627c58d846750d18a5a +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/93146 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167574 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 16 --------------- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 31 ++++++++++++------------------ + 2 files changed, 12 insertions(+), 35 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index f85958b..92ceb8e 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -1678,22 +1678,6 @@ cli_cmd_volume_set_parse(struct cli_state *state, const char **words, + goto out; + } + } +- if ((!strcmp(key, "nfs.disable")) && (!strcmp(value, "off"))) { +- question = +- "Gluster NFS is being deprecated in favor " +- "of NFS-Ganesha Enter \"yes\" to continue " +- "using Gluster NFS"; +- answer = cli_cmd_get_confirmation(state, question); +- if (GF_ANSWER_NO == answer) { +- gf_log("cli", GF_LOG_ERROR, +- "Operation " +- "cancelled, exiting"); +- *op_errstr = gf_strdup("Aborted by user."); +- ret = -1; +- goto out; +- } +- } +- + if ((strcmp(key, "cluster.brick-multiplex") == 0)) { + question = + "Brick-multiplexing is supported only for " +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 06ea8cf..df8a6ab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2502,32 +2502,25 @@ glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo) + + /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade + * from anything below than 3.9.0 to 3.9.x, the value for nfs.disable is +- * set to 'on' for all volumes even if it is explicitly set to 'off' in ++ * set to 'off' for all volumes even if it is explicitly set to 'on' in + * previous version. This change is only applicable to downstream code. +- * Setting nfs.disable to 'on' at op-version bump up flow is the ideal ++ * Setting nfs.disable to 'off' at op-version bump up flow is the ideal + * way here. The same is also applicable for transport.address-family + * where if the transport type is set to tcp then transport.address-family + * is defaulted to 'inet'. + */ + if (conf->op_version >= GD_OP_VERSION_3_9_0) { + if (!(dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 0))) { +- gf_msg(this->name, GF_LOG_INFO, 0, 0, +- "Gluster NFS is" +- " being deprecated in favor of NFS-Ganesha, " +- "hence setting nfs.disable to 'on' for volume " +- "%s. Please re-enable it if requires", +- volinfo->volname); +- } +- +- ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, +- "on"); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, +- "Failed to set " +- "option ' NFS_DISABLE_MAP_KEY ' on " +- "volume %s", +- volinfo->volname); +- goto out; ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY, ++ "off"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set " ++ "option ' NFS_DISABLE_MAP_KEY ' off " ++ "volume %s", ++ volinfo->volname); ++ goto out; ++ } + } + + ret = dict_get_strn(volinfo->dict, "transport.address-family", +-- +1.8.3.1 + diff --git a/SOURCES/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch b/SOURCES/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch new file mode 100644 index 0000000..f4a39aa --- /dev/null +++ b/SOURCES/0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch @@ -0,0 +1,41 @@ +From 82d7c8e057b9e22d13ca89f2a75e65a42878b7c3 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Tue, 2 Apr 2019 10:45:15 +0530 +Subject: [PATCH 092/124] logging: Fix GF_LOG_OCCASSIONALLY API + +GF_LOG_OCCASSIONALLY doesn't log on the first instance rather at every +42nd iterations which isn't effective as in some cases we might not have +the code flow hitting the same log for as many as 42 times and we'd end +up suppressing the log. + +>upstream fix : https://review.gluster.org/#/c/glusterfs/+/22475/ +>Fixes: bz#1694925 +>Change-Id: Iee293281d25a652b64df111d59b13de4efce06fa +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1691620 +Change-Id: Iee293281d25a652b64df111d59b13de4efce06fa +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167822 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/logging.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/logging.h b/libglusterfs/src/glusterfs/logging.h +index c81e19b..3655b1d 100644 +--- a/libglusterfs/src/glusterfs/logging.h ++++ b/libglusterfs/src/glusterfs/logging.h +@@ -300,7 +300,7 @@ _gf_log_eh(const char *function, const char *fmt, ...) + + /* Log once in GF_UNIVERSAL_ANSWER times */ + #define GF_LOG_OCCASIONALLY(var, args...) \ +- if (!(var++ % GF_UNIVERSAL_ANSWER)) { \ ++ if (var++ == 0 || !((var - 1) % GF_UNIVERSAL_ANSWER)) { \ + gf_log(args); \ + } + +-- +1.8.3.1 + diff --git a/SOURCES/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch b/SOURCES/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch new file mode 100644 index 0000000..7c3782c --- /dev/null +++ b/SOURCES/0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch @@ -0,0 +1,106 @@ +From 713f55b4a5cc582d06a10a1c9a0cdf71a4636a10 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 15 Apr 2019 07:57:40 +0530 +Subject: [PATCH 093/124] glusterd: Change op-version of cache-invalidation in + quick-read + +In upstream cache-invalidation option in quick read was introduced with +4.0. There are two problems with it: + +1. The key being made duplicate to md-cache xlator, so setting the same +option actually enables this feature on both md-cache and quick-read. +2. As the op-version tagged to this key was GD_OP_VERSION_4_0_0, with a RHGS +3.5 cluster when a md-cache profile is set to a particular volume old +client fails to mount which is wrong. + +Solving 1 with out breaking backward compatibility in upstream is quite hard. +This patch addresses both the problems in downstream by (a) changing the +op-version to GD_OP_VERSION_6_0_0 and (b) renaming this key to +quick-read-cache-invalidation. The fix is currently made downstream only till a +proper solution is identified in upstream. + +Label: DOWNSTREAM ONLY +BUG: 1697820 +Change-Id: I1251424576d6ebbdb2a761400fd20f0aff0c80a2 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167823 +Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/quick-read-with-upcall.t | 1 + + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 +++--- + xlators/performance/quick-read/src/quick-read.c | 11 ++++++----- + 3 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/tests/basic/quick-read-with-upcall.t b/tests/basic/quick-read-with-upcall.t +index 318e93a..647dacf 100644 +--- a/tests/basic/quick-read-with-upcall.t ++++ b/tests/basic/quick-read-with-upcall.t +@@ -58,6 +58,7 @@ EXPECT "$D0" cat $M1/test1.txt + sleep 60 + EXPECT "$D1" cat $M1/test1.txt + ++TEST $CLI volume set $V0 performance.quick-read-cache-invalidation on + TEST $CLI volume set $V0 performance.cache-invalidation on + + TEST write_to "$M0/test2.txt" "$D0" +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index a877805..42ca9bb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1690,10 +1690,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .option = "cache-timeout", + .op_version = 1, + .flags = VOLOPT_FLAG_CLIENT_OPT}, +- {.key = "performance.cache-invalidation", ++ {.key = "performance.quick-read-cache-invalidation", + .voltype = "performance/quick-read", +- .option = "cache-invalidation", +- .op_version = GD_OP_VERSION_4_0_0, ++ .option = "quick-read-cache-invalidation", ++ .op_version = GD_OP_VERSION_6_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = "performance.ctime-invalidation", + .voltype = "performance/quick-read", +diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c +index 244e8c8..59553c0 100644 +--- a/xlators/performance/quick-read/src/quick-read.c ++++ b/xlators/performance/quick-read/src/quick-read.c +@@ -1218,8 +1218,8 @@ qr_reconfigure(xlator_t *this, dict_t *options) + + GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out); + +- GF_OPTION_RECONF("cache-invalidation", conf->qr_invalidation, options, bool, +- out); ++ GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation, ++ options, bool, out); + + GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options, + bool, out); +@@ -1369,7 +1369,8 @@ qr_init(xlator_t *this) + + GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out); + +- GF_OPTION_INIT("cache-invalidation", conf->qr_invalidation, bool, out); ++ GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool, ++ out); + + GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out); + if (!check_cache_size_ok(this, conf->cache_size)) { +@@ -1615,10 +1616,10 @@ struct volume_options qr_options[] = { + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + { +- .key = {"cache-invalidation"}, ++ .key = {"quick-read-cache-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", +- .op_version = {GD_OP_VERSION_4_0_0}, ++ .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "When \"on\", invalidates/updates the metadata cache," + " on receiving the cache-invalidation notifications", +-- +1.8.3.1 + diff --git a/SOURCES/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch b/SOURCES/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch new file mode 100644 index 0000000..727f8b5 --- /dev/null +++ b/SOURCES/0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch @@ -0,0 +1,45 @@ +From dab37dc78d21762ac3379ad505f8fc4ec996d0f7 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Tue, 9 Apr 2019 14:58:29 +0530 +Subject: [PATCH 094/124] glusterd: load ctime in the client graph only if it's + not turned off + +Considering ctime is a client side feature, we can't blindly load ctime +xlator into the client graph if it's explicitly turned off, that'd +result into backward compatibility issue where an old client can't mount +a volume configured on a server which is having ctime feature. + +> Upstream patch : https://review.gluster.org/#/c/glusterfs/+/22536/ +>Fixes: bz#1697907 +>Change-Id: I6ae7b96d056073aa6746de9a449cf319786d45cc +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1697820 +Change-Id: I6ae7b96d056073aa6746de9a449cf319786d45cc +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167815 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index ed24858..012f38e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4358,7 +4358,8 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + } + } + +- if (conf->op_version >= GD_OP_VERSION_5_0) { ++ if (conf->op_version >= GD_OP_VERSION_5_0 && ++ !dict_get_str_boolean(set_dict, "features.ctime", _gf_false)) { + xl = volgen_graph_add(graph, "features/utime", volname); + if (!xl) { + ret = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch b/SOURCES/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch new file mode 100644 index 0000000..0e17c44 --- /dev/null +++ b/SOURCES/0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch @@ -0,0 +1,204 @@ +From cca418b78ec976aa69eacd56b0e6127ea7e3dd26 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Thu, 4 Apr 2019 15:31:56 +0530 +Subject: [PATCH 095/124] cluster/afr: Remove local from owners_list on failure + of lock-acquisition + + Backport of https://review.gluster.org/c/glusterfs/+/22515 + +When eager-lock lock acquisition fails because of say network failures, the +local is not being removed from owners_list, this leads to accumulation of +waiting frames and the application will hang because the waiting frames are +under the assumption that another transaction is in the process of acquiring +lock because owner-list is not empty. Handled this case as well in this patch. +Added asserts to make it easier to find these problems in future. + +Change-Id: I3101393265e9827755725b1f2d94a93d8709e923 +fixes: bz#1688395 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167859 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1696599-io-hang.t | 47 ++++++++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 8 ++--- + xlators/cluster/afr/src/afr-lk-common.c | 1 - + xlators/cluster/afr/src/afr-transaction.c | 19 +++++------- + xlators/cluster/afr/src/afr.h | 4 +-- + 5 files changed, 61 insertions(+), 18 deletions(-) + create mode 100755 tests/bugs/replicate/bug-1696599-io-hang.t + +diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t +new file mode 100755 +index 0000000..869cdb9 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1696599-io-hang.t +@@ -0,0 +1,47 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fileio.rc ++ ++#Tests that local structures in afr are removed from granted/blocked list of ++#locks when inodelk fails on all bricks ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3} ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.client-io-threads off ++TEST $CLI volume set $V0 delay-gen locks ++TEST $CLI volume set $V0 delay-gen.delay-duration 5000000 ++TEST $CLI volume set $V0 delay-gen.delay-percentage 100 ++TEST $CLI volume set $V0 delay-gen.enable finodelk ++ ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++TEST $GFS -s $H0 --volfile-id $V0 $M0 ++TEST touch $M0/file ++#Trigger write and stop bricks so inodelks fail on all bricks leading to ++#lock failure condition ++echo abc >> $M0/file & ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume reset $V0 delay-gen ++wait ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2 ++#Test that only one write succeeded, this tests that delay-gen worked as ++#expected ++echo abc >> $M0/file ++EXPECT "abc" cat $M0/file ++ ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 45b96e3..47a5d3a 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5763,6 +5763,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) + afr_private_t *priv = NULL; + + priv = this->private; ++ INIT_LIST_HEAD(&local->transaction.wait_list); ++ INIT_LIST_HEAD(&local->transaction.owner_list); ++ INIT_LIST_HEAD(&local->ta_waitq); ++ INIT_LIST_HEAD(&local->ta_onwireq); + ret = afr_internal_lock_init(&local->internal_lock, priv->child_count); + if (ret < 0) + goto out; +@@ -5800,10 +5804,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) + goto out; + + ret = 0; +- INIT_LIST_HEAD(&local->transaction.wait_list); +- INIT_LIST_HEAD(&local->transaction.owner_list); +- INIT_LIST_HEAD(&local->ta_waitq); +- INIT_LIST_HEAD(&local->ta_onwireq); + out: + return ret; + } +diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c +index 4091671..bc8eabe 100644 +--- a/xlators/cluster/afr/src/afr-lk-common.c ++++ b/xlators/cluster/afr/src/afr-lk-common.c +@@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this) + int_lock->lk_call_count = call_count; + + if (!call_count) { +- GF_ASSERT(!local->transaction.do_eager_unlock); + gf_msg_trace(this->name, 0, "No internal locks unlocked"); + int_lock->lock_cbk(frame, this); + goto out; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 229820b..15f3a7e 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this) + } + local->transaction.unwind(frame, this); + ++ GF_ASSERT(list_empty(&local->transaction.owner_list)); ++ GF_ASSERT(list_empty(&local->transaction.wait_list)); + AFR_STACK_DESTROY(frame); + + return 0; +@@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list) + } + + static void +-afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) ++afr_handle_lock_acquire_failure(afr_local_t *local) + { + struct list_head shared; + afr_lock_t *lock = NULL; +@@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) + afr_lock_fail_shared(local, &shared); + local->transaction.do_eager_unlock = _gf_true; + out: +- if (locked) { +- local->internal_lock.lock_cbk = afr_transaction_done; +- afr_unlock(local->transaction.frame, local->transaction.frame->this); +- } else { +- afr_transaction_done(local->transaction.frame, +- local->transaction.frame->this); +- } ++ local->internal_lock.lock_cbk = afr_transaction_done; ++ afr_unlock(local->transaction.frame, local->transaction.frame->this); + } + + call_frame_t * +@@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this) + failure_count = AFR_COUNT(local->transaction.failed_subvols, + priv->child_count); + if (failure_count == priv->child_count) { +- afr_handle_lock_acquire_failure(local, _gf_true); ++ afr_handle_lock_acquire_failure(local); + return 0; + } else { + lock = &local->inode_ctx->lock[local->transaction.type]; +@@ -2092,7 +2089,7 @@ err: + local->op_ret = -1; + local->op_errno = op_errno; + +- afr_handle_lock_acquire_failure(local, _gf_true); ++ afr_handle_lock_acquire_failure(local); + + if (xdata_req) + dict_unref(xdata_req); +@@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this) + } else { + lock = &local->inode_ctx->lock[local->transaction.type]; + if (local->internal_lock.lock_op_ret < 0) { +- afr_handle_lock_acquire_failure(local, _gf_false); ++ afr_handle_lock_acquire_failure(local); + } else { + lock->event_generation = local->event_generation; + afr_changelog_pre_op(frame, this); +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 2cc3797..e731cfa 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1091,8 +1091,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd); + #define AFR_FRAME_INIT(frame, op_errno) \ + ({ \ + frame->local = mem_get0(THIS->local_pool); \ +- if (afr_local_init(frame->local, THIS->private, &op_errno)) { \ +- afr_local_cleanup(frame->local, THIS); \ ++ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \ ++ afr_local_cleanup(frame->local, frame->this); \ + mem_put(frame->local); \ + frame->local = NULL; \ + }; \ +-- +1.8.3.1 + diff --git a/SOURCES/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch b/SOURCES/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch new file mode 100644 index 0000000..cc2a448 --- /dev/null +++ b/SOURCES/0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch @@ -0,0 +1,94 @@ +From 3bcffadcd77eebe6b4f7e5015ad41ec7c1d1ec3e Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Thu, 11 Apr 2019 20:38:53 +0530 +Subject: [PATCH 096/124] core: Brick is not able to detach successfully in + brick_mux environment + +Problem: In brick_mux environment, while volumes are stopped in a + loop bricks are not detached successfully. Brick's are not + detached because xprtrefcnt has not become 0 for detached brick. + At the time of initiating brick detach process server_notify + saves xprtrefcnt on detach brick and once counter has become + 0 then server_rpc_notify spawn a server_graph_janitor_threads + for cleanup brick resources.xprtrefcnt has not become 0 because + socket framework is not working due to assigning 0 as a fd for socket. + In commit dc25d2c1eeace91669052e3cecc083896e7329b2 + there was a change in changelog fini to close htime_fd if htime_fd is not + negative, by default htime_fd is 0 so it close 0 also. + +Solution: Initialize htime_fd to -1 after just allocate changelog_priv + by GF_CALLOC + +> Fixes: bz#1699025 +> Change-Id: I5f7ca62a0eb1c0510c3e9b880d6ab8af8d736a25 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit b777d83001d8006420b6c7d2d88fe68950aa7e00) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22549/ + +Fixes: bz#1698919 +Change-Id: Ib5b74aa0818235625f8aac7c23d4daa599da3fd1 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167826 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1699025-brick-mux-detach-brick-fd-issue.t | 33 ++++++++++++++++++++++ + xlators/features/changelog/src/changelog.c | 1 + + 2 files changed, 34 insertions(+) + create mode 100644 tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t + +diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t +new file mode 100644 +index 0000000..1acbaa8 +--- /dev/null ++++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t +@@ -0,0 +1,33 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++ ++function count_brick_processes { ++ pgrep glusterfsd | wc -l ++} ++ ++cleanup ++ ++#bug-1444596 - validating brick mux ++ ++TEST glusterd ++TEST $CLI volume create $V0 $H0:$B0/brick{0,1} ++TEST $CLI volume create $V1 $H0:$B0/brick{2,3} ++ ++TEST $CLI volume set all cluster.brick-multiplex on ++ ++TEST $CLI volume start $V0 ++TEST $CLI volume start $V1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count ++EXPECT 1 count_brick_processes ++ ++TEST $CLI volume stop $V1 ++# At the time initialize brick daemon it always keeps open ++# standard fd's (0, 1 , 2) so after stop 1 volume fd's should ++# be open ++nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l) ++TEST [ $((nofds)) -eq 3 ] ++ ++cleanup +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index 1f22a97..d9025f3 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -2740,6 +2740,7 @@ init(xlator_t *this) + GF_ATOMIC_INIT(priv->clntcnt, 0); + GF_ATOMIC_INIT(priv->xprtcnt, 0); + INIT_LIST_HEAD(&priv->xprt_list); ++ priv->htime_fd = -1; + + ret = changelog_init_options(this, priv); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch b/SOURCES/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch new file mode 100644 index 0000000..42f1e47 --- /dev/null +++ b/SOURCES/0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch @@ -0,0 +1,61 @@ +From 302f3f87c9aa00c17ec3b49a81c8a4441d2bdf5f Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Mon, 15 Apr 2019 10:01:40 +0530 +Subject: [PATCH 097/124] glusterd/tier: while doing an attach tier, the self + heal daemon is not getting started + +Problem: on a replicated volume, if attach tier is done, +The shd will be restarted. But here the restart fails because of the +graph not getting generated properly. The dict which is used for graph +creation doesn't get the values copied properly in prepare_shd_volume_options() +glusterd_prepare_shd_volume_options_for_tier() fails and skips the copy. + +This patch reverts the changes back to the way it was in 3.4 and +help in fixing the issue. Using the old dict_set_str works. + +label: DOWNSTREAM ONLY + +Change-Id: I21534ca177511e018ba76886e899b3b1a4ac4716 +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167825 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 012f38e..1f53beb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4895,14 +4895,21 @@ glusterd_prepare_shd_volume_options_for_tier(glusterd_volinfo_t *volinfo, + dict_t *set_dict) + { + int ret = -1; ++ char *key = NULL; + +- ret = volgen_set_shd_key_enable(set_dict, volinfo->tier_info.cold_type); +- if (ret) +- goto out; ++ key = volgen_get_shd_key (volinfo->tier_info.cold_type); ++ if (key) { ++ ret = dict_set_str (set_dict, key, "enable"); ++ if (ret) ++ goto out; ++ } + +- ret = volgen_set_shd_key_enable(set_dict, volinfo->tier_info.hot_type); +- if (ret) +- goto out; ++ key = volgen_get_shd_key (volinfo->tier_info.hot_type); ++ if (key) { ++ ret = dict_set_str (set_dict, key, "enable"); ++ if (ret) ++ goto out; ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch b/SOURCES/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch new file mode 100644 index 0000000..64d198d --- /dev/null +++ b/SOURCES/0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch @@ -0,0 +1,4617 @@ +From 2cede2b87fb3e3e0673be9cf67e7d6eec3f7879c Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Mon, 25 Feb 2019 10:05:32 +0530 +Subject: [PATCH 098/124] mgmt/shd: Implement multiplexing in self heal daemon + +Problem: + +Shd daemon is per node, which means they create a graph +with all volumes on it. While this is a great for utilizing +resources, it is so good in terms of performance and managebility. + +Because self-heal daemons doesn't have capability to automatically +reconfigure their graphs. So each time when any configurations +changes happens to the volumes(replicate/disperse), we need to restart +shd to bring the changes into the graph. + +Because of this all on going heal for all other volumes has to be +stopped in the middle, and need to restart all over again. + +Solution: + +This changes makes shd as a per volume daemon, so that the graph +will be generated for each volumes. + +When we want to start/reconfigure shd for a volume, we first search +for an existing shd running on the node, if there is none, we will +start a new process. If already a daemon is running for shd, then +we will simply detach a graph for a volume and reatach the updated +graph for the volume. This won't touch any of the on going operations +for any other volumes on the shd daemon. + +Example of an shd graph when it is per volume + + graph + ----------------------- + | debug-iostat | + ----------------------- + / | \ + / | \ + --------- --------- ---------- + | AFR-1 | | AFR-2 | | AFR-3 | + -------- --------- ---------- + +A running shd daemon with 3 volumes will be like--> + + graph + ----------------------- + | debug-iostat | + ----------------------- + / | \ + / | \ + ------------ ------------ ------------ + | volume-1 | | volume-2 | | volume-3 | + ------------ ------------ ------------ + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22075 + +>Change-Id: Idcb2698be3eeb95beaac47125565c93370afbd99 +>fixes: bz#1659708 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I38b6e2bb62edd818e460ccf6e9740364dc676876 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167830 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfsd/src/glusterfsd-messages.h | 3 +- + glusterfsd/src/glusterfsd-mgmt.c | 238 ++++++- + glusterfsd/src/glusterfsd.c | 18 - + libglusterfs/src/defaults-tmpl.c | 19 +- + libglusterfs/src/glusterfs/glusterfs.h | 7 + + libglusterfs/src/glusterfs/libglusterfs-messages.h | 4 +- + libglusterfs/src/glusterfs/xlator.h | 3 + + libglusterfs/src/graph.c | 451 +++++++++++++ + libglusterfs/src/graph.y | 3 + + libglusterfs/src/libglusterfs.sym | 5 + + libglusterfs/src/statedump.c | 3 +- + libglusterfs/src/xlator.c | 16 + + rpc/rpc-lib/src/protocol-common.h | 2 + + tests/basic/glusterd/heald.t | 49 +- + .../reset-brick-and-daemons-follow-quorum.t | 8 +- + tests/volume.rc | 6 +- + xlators/mgmt/glusterd/src/Makefile.am | 6 +- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 42 ++ + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h | 4 +- + xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-handler.c | 11 +- + xlators/mgmt/glusterd/src/glusterd-handshake.c | 21 + + xlators/mgmt/glusterd/src/glusterd-mem-types.h | 1 + + xlators/mgmt/glusterd/src/glusterd-messages.h | 4 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 84 ++- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 140 ++++ + .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 45 ++ + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 540 ++++++++++++++-- + xlators/mgmt/glusterd/src/glusterd-shd-svc.h | 17 +- + xlators/mgmt/glusterd/src/glusterd-sm.c | 12 +- + xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-statedump.c | 3 - + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 715 ++++++++++++++++++++- + xlators/mgmt/glusterd/src/glusterd-svc-helper.h | 40 +- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 246 +++++-- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 27 + + xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-tierd-svc.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 137 +++- + xlators/mgmt/glusterd/src/glusterd-utils.h | 4 + + xlators/mgmt/glusterd/src/glusterd-volgen.c | 60 +- + xlators/mgmt/glusterd/src/glusterd-volgen.h | 11 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 8 +- + xlators/mgmt/glusterd/src/glusterd.c | 12 +- + xlators/mgmt/glusterd/src/glusterd.h | 30 +- + xlators/protocol/client/src/client.c | 31 +- + 47 files changed, 2810 insertions(+), 291 deletions(-) + create mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c + create mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h + +diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h +index 602cd9e..94312a5 100644 +--- a/glusterfsd/src/glusterfsd-messages.h ++++ b/glusterfsd/src/glusterfsd-messages.h +@@ -35,6 +35,7 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3, + glusterfsd_msg_28, glusterfsd_msg_29, glusterfsd_msg_30, + glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33, + glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36, +- glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39); ++ glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39, ++ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42); + + #endif /* !_GLUSTERFSD_MESSAGES_H_ */ +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index a6c3db5..a89c980 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -48,7 +48,20 @@ int + glusterfs_graph_unknown_options(glusterfs_graph_t *graph); + int + emancipate(glusterfs_ctx_t *ctx, int ret); ++int ++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, ++ char *volfile_id, char *checksum); ++int ++glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, ++ gf_volfile_t *volfile_obj, char *checksum); ++int ++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, ++ char *volfile_id, char *checksum); ++int ++glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj); + ++gf_boolean_t ++mgmt_is_multiplexed_daemon(char *name); + int + mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + { +@@ -62,6 +75,96 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + } + + int ++mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id) ++{ ++ glusterfs_ctx_t *ctx = NULL; ++ int ret = 0; ++ FILE *tmpfp = NULL; ++ gf_volfile_t *volfile_obj = NULL; ++ gf_volfile_t *volfile_tmp = NULL; ++ char sha256_hash[SHA256_DIGEST_LENGTH] = { ++ 0, ++ }; ++ int tmp_fd = -1; ++ char template[] = "/tmp/glfs.volfile.XXXXXX"; ++ ++ glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash); ++ ctx = THIS->ctx; ++ LOCK(&ctx->volfile_lock); ++ { ++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) ++ { ++ if (!strcmp(volfile_id, volfile_obj->vol_id)) { ++ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum, ++ sizeof(volfile_obj->volfile_checksum))) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40, ++ "No change in volfile, continuing"); ++ goto out; ++ } ++ volfile_tmp = volfile_obj; ++ break; ++ } ++ } ++ ++ /* coverity[secure_temp] mkstemp uses 0600 as the mode */ ++ tmp_fd = mkstemp(template); ++ if (-1 == tmp_fd) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39, ++ "Unable to create temporary file: %s", template); ++ ret = -1; ++ goto out; ++ } ++ ++ /* Calling unlink so that when the file is closed or program ++ * terminates the temporary file is deleted. ++ */ ++ ret = sys_unlink(template); ++ if (ret < 0) { ++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39, ++ "Unable to delete temporary file: %s", template); ++ ret = 0; ++ } ++ ++ tmpfp = fdopen(tmp_fd, "w+b"); ++ if (!tmpfp) { ++ ret = -1; ++ goto unlock; ++ } ++ ++ fwrite(volfile, size, 1, tmpfp); ++ fflush(tmpfp); ++ if (ferror(tmpfp)) { ++ ret = -1; ++ goto unlock; ++ } ++ ++ if (!volfile_tmp) { ++ /* There is no checksum in the list, which means simple attach ++ * the volfile ++ */ ++ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id, ++ sha256_hash); ++ goto unlock; ++ } ++ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj, ++ sha256_hash); ++ if (ret < 0) { ++ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!"); ++ } ++ } ++unlock: ++ UNLOCK(&ctx->volfile_lock); ++out: ++ if (tmpfp) ++ fclose(tmpfp); ++ else if (tmp_fd != -1) ++ sys_close(tmp_fd); ++ return ret; ++} ++ ++int + mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data) + { + return 0; +@@ -966,6 +1069,110 @@ glusterfs_handle_attach(rpcsvc_request_t *req) + } + + int ++glusterfs_handle_svc_attach(rpcsvc_request_t *req) ++{ ++ int32_t ret = -1; ++ gd1_mgmt_brick_op_req xlator_req = { ++ 0, ++ }; ++ xlator_t *this = NULL; ++ glusterfs_ctx_t *ctx = NULL; ++ ++ GF_ASSERT(req); ++ this = THIS; ++ GF_ASSERT(this); ++ ++ ctx = this->ctx; ++ ret = xdr_to_generic(req->msg[0], &xlator_req, ++ (xdrproc_t)xdr_gd1_mgmt_brick_op_req); ++ ++ if (ret < 0) { ++ /*failed to decode msg;*/ ++ req->rpc_err = GARBAGE_ARGS; ++ goto out; ++ } ++ gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, ++ "received attach " ++ "request for volfile-id=%s", ++ xlator_req.name); ++ ret = 0; ++ ++ if (ctx->active) { ++ ret = mgmt_process_volfile(xlator_req.input.input_val, ++ xlator_req.input.input_len, xlator_req.name); ++ } else { ++ gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, ++ "got attach for %s but no active graph", xlator_req.name); ++ } ++out: ++ if (xlator_req.input.input_val) ++ free(xlator_req.input.input_val); ++ if (xlator_req.name) ++ free(xlator_req.name); ++ glusterfs_translator_info_response_send(req, ret, NULL, NULL); ++ return 0; ++} ++ ++int ++glusterfs_handle_svc_detach(rpcsvc_request_t *req) ++{ ++ gd1_mgmt_brick_op_req xlator_req = { ++ 0, ++ }; ++ ssize_t ret; ++ glusterfs_ctx_t *ctx = NULL; ++ gf_volfile_t *volfile_obj = NULL; ++ gf_volfile_t *volfile_tmp = NULL; ++ ++ ret = xdr_to_generic(req->msg[0], &xlator_req, ++ (xdrproc_t)xdr_gd1_mgmt_brick_op_req); ++ if (ret < 0) { ++ req->rpc_err = GARBAGE_ARGS; ++ return -1; ++ } ++ ctx = glusterfsd_ctx; ++ ++ LOCK(&ctx->volfile_lock); ++ { ++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) ++ { ++ if (!strcmp(xlator_req.name, volfile_obj->vol_id)) { ++ volfile_tmp = volfile_obj; ++ break; ++ } ++ } ++ ++ if (!volfile_tmp) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_41, ++ "can't detach %s - not found", xlator_req.name); ++ /* ++ * Used to be -ENOENT. However, the caller asked us to ++ * make sure it's down and if it's already down that's ++ * good enough. ++ */ ++ ret = 0; ++ goto out; ++ } ++ ret = glusterfs_process_svc_detach(ctx, volfile_tmp); ++ if (ret) { ++ UNLOCK(&ctx->volfile_lock); ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_41, ++ "Could not detach " ++ "old graph. Aborting the reconfiguration operation"); ++ goto out; ++ } ++ } ++ UNLOCK(&ctx->volfile_lock); ++out: ++ glusterfs_terminate_response_send(req, ret); ++ free(xlator_req.name); ++ xlator_req.name = NULL; ++ ++ return 0; ++} ++ ++int + glusterfs_handle_dump_metrics(rpcsvc_request_t *req) + { + int32_t ret = -1; +@@ -1849,6 +2056,13 @@ rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = { + + [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", GLUSTERD_DUMP_METRICS, + glusterfs_handle_dump_metrics, NULL, 0, DRC_NA}, ++ ++ [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", GLUSTERD_SVC_ATTACH, ++ glusterfs_handle_svc_attach, NULL, 0, DRC_NA}, ++ ++ [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", GLUSTERD_SVC_DETACH, ++ glusterfs_handle_svc_detach, NULL, 0, DRC_NA}, ++ + }; + + struct rpcsvc_program glusterfs_mop_prog = { +@@ -1996,14 +2210,17 @@ mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count, + } + + volfile: +- ret = 0; + size = rsp.op_ret; ++ volfile_id = frame->local; ++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { ++ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id); ++ goto post_graph_mgmt; ++ } + ++ ret = 0; + glusterfs_compute_sha256((const unsigned char *)rsp.spec, size, + sha256_hash); + +- volfile_id = frame->local; +- + LOCK(&ctx->volfile_lock); + { + locked = 1; +@@ -2105,6 +2322,7 @@ volfile: + } + + INIT_LIST_HEAD(&volfile_tmp->volfile_list); ++ volfile_tmp->graph = ctx->active; + list_add(&volfile_tmp->volfile_list, &ctx->volfile_list); + snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s", + volfile_id); +@@ -2116,6 +2334,7 @@ volfile: + + locked = 0; + ++post_graph_mgmt: + if (!is_mgmt_rpc_reconnect) { + need_emancipate = 1; + glusterfs_mgmt_pmap_signin(ctx); +@@ -2269,10 +2488,21 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx) + { + xlator_t *server_xl = NULL; + xlator_list_t *trav; +- int ret; ++ gf_volfile_t *volfile_obj = NULL; ++ int ret = 0; + + LOCK(&ctx->volfile_lock); + { ++ if (ctx->active && ++ mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { ++ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) ++ { ++ ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id); ++ } ++ UNLOCK(&ctx->volfile_lock); ++ return ret; ++ } ++ + if (ctx->active) { + server_xl = ctx->active->first; + if (strcmp(server_xl->type, "protocol/server") != 0) { +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index c983882..3aa89ca 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -2589,24 +2589,6 @@ out: + #endif + + int +-glusterfs_graph_fini(glusterfs_graph_t *graph) +-{ +- xlator_t *trav = NULL; +- +- trav = graph->first; +- +- while (trav) { +- if (trav->init_succeeded) { +- trav->fini(trav); +- trav->init_succeeded = 0; +- } +- trav = trav->next; +- } +- +- return 0; +-} +- +-int + glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp) + { + glusterfs_graph_t *graph = NULL; +diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c +index 5bf64e8..82e7f78 100644 +--- a/libglusterfs/src/defaults-tmpl.c ++++ b/libglusterfs/src/defaults-tmpl.c +@@ -127,6 +127,12 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + GF_UNUSED int ret = 0; + xlator_t *victim = data; + ++ glusterfs_graph_t *graph = NULL; ++ ++ GF_VALIDATE_OR_GOTO("notify", this, out); ++ graph = this->graph; ++ GF_VALIDATE_OR_GOTO(this->name, graph, out); ++ + switch (event) { + case GF_EVENT_PARENT_UP: + case GF_EVENT_PARENT_DOWN: { +@@ -159,6 +165,17 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + xlator_notify(parent->xlator, event, this, NULL); + parent = parent->next; + } ++ ++ if (event == GF_EVENT_CHILD_DOWN && ++ !(this->ctx && this->ctx->master) && (graph->top == this)) { ++ /* Make sure this is not a daemon with master xlator */ ++ pthread_mutex_lock(&graph->mutex); ++ { ++ graph->used = 0; ++ pthread_cond_broadcast(&graph->child_down_cond); ++ } ++ pthread_mutex_unlock(&graph->mutex); ++ } + } break; + case GF_EVENT_UPCALL: { + xlator_list_t *parent = this->parents; +@@ -205,7 +222,7 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + * nothing to do with readability. + */ + } +- ++out: + return 0; + } + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 7c6af09..deec5ba 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -590,6 +590,10 @@ struct _glusterfs_graph { + int used; /* Should be set when fuse gets + first CHILD_UP */ + uint32_t volfile_checksum; ++ void *last_xl; /* Stores the last xl of the graph, as of now only populated ++ in client multiplexed code path */ ++ pthread_mutex_t mutex; ++ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */ + }; + typedef struct _glusterfs_graph glusterfs_graph_t; + +@@ -732,6 +736,7 @@ typedef struct { + char volfile_checksum[SHA256_DIGEST_LENGTH]; + char vol_id[NAME_MAX + 1]; + struct list_head volfile_list; ++ glusterfs_graph_t *graph; + + } gf_volfile_t; + +@@ -815,4 +820,6 @@ gf_free_mig_locks(lock_migration_info_t *locks); + + int + glusterfs_read_secure_access_file(void); ++int ++glusterfs_graph_fini(glusterfs_graph_t *graph); + #endif /* _GLUSTERFS_H */ +diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h +index 1b72f6d..ea2aa60 100644 +--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h ++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h +@@ -109,6 +109,8 @@ GLFS_MSGID( + LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST, + LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED, + LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG, +- LG_MSG_XXH64_TO_GFID_FAILED); ++ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE, ++ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED, ++ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED); + + #endif /* !_LG_MESSAGES_H_ */ +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 7002657..06152ec 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1087,4 +1087,7 @@ handle_default_options(xlator_t *xl, dict_t *options); + + void + gluster_graph_take_reference(xlator_t *tree); ++ ++gf_boolean_t ++mgmt_is_multiplexed_daemon(char *name); + #endif /* _XLATOR_H */ +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index bb5e67a..a492dd8 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -114,6 +114,53 @@ out: + return cert_depth; + } + ++xlator_t * ++glusterfs_get_last_xlator(glusterfs_graph_t *graph) ++{ ++ xlator_t *trav = graph->first; ++ if (!trav) ++ return NULL; ++ ++ while (trav->next) ++ trav = trav->next; ++ ++ return trav; ++} ++ ++xlator_t * ++glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl) ++{ ++ xlator_list_t *unlink = NULL; ++ xlator_list_t *prev = NULL; ++ xlator_list_t **tmp = NULL; ++ xlator_t *next_child = NULL; ++ xlator_t *xl = NULL; ++ ++ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) { ++ if ((*tmp)->xlator == cxl) { ++ unlink = *tmp; ++ *tmp = (*tmp)->next; ++ if (*tmp) ++ next_child = (*tmp)->xlator; ++ break; ++ } ++ prev = *tmp; ++ } ++ ++ if (!prev) ++ xl = pxl; ++ else if (prev->xlator) ++ xl = prev->xlator->graph->last_xl; ++ ++ if (xl) ++ xl->next = next_child; ++ if (next_child) ++ next_child->prev = xl; ++ ++ GF_FREE(unlink); ++ return next_child; ++} ++ + int + glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl) + { +@@ -1092,6 +1139,8 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph) + ret = xlator_tree_free_memacct(graph->first); + + list_del_init(&graph->list); ++ pthread_mutex_destroy(&graph->mutex); ++ pthread_cond_destroy(&graph->child_down_cond); + GF_FREE(graph); + + return ret; +@@ -1134,6 +1183,25 @@ out: + } + + int ++glusterfs_graph_fini(glusterfs_graph_t *graph) ++{ ++ xlator_t *trav = NULL; ++ ++ trav = graph->first; ++ ++ while (trav) { ++ if (trav->init_succeeded) { ++ trav->cleanup_starting = 1; ++ trav->fini(trav); ++ trav->init_succeeded = 0; ++ } ++ trav = trav->next; ++ } ++ ++ return 0; ++} ++ ++int + glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, + glusterfs_graph_t **newgraph) + { +@@ -1256,3 +1324,386 @@ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, + + return 0; + } ++int ++glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx, ++ glusterfs_graph_t *parent_graph) ++{ ++ if (parent_graph) { ++ if (parent_graph->first) { ++ xlator_destroy(parent_graph->first); ++ } ++ ctx->active = NULL; ++ GF_FREE(parent_graph); ++ parent_graph = NULL; ++ } ++ return 0; ++} ++ ++void * ++glusterfs_graph_cleanup(void *arg) ++{ ++ glusterfs_graph_t *graph = NULL; ++ glusterfs_ctx_t *ctx = THIS->ctx; ++ int ret = -1; ++ graph = arg; ++ ++ if (!graph) ++ return NULL; ++ ++ /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN ++ * Then wait for GF_EVENT_CHILD_DOWN to get on the top ++ * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed ++ * to fini. ++ * ++ * During fini call, this will take a last unref on rpc and ++ * rpc_transport_object. ++ */ ++ if (graph->first) ++ default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first); ++ ++ ret = pthread_mutex_lock(&graph->mutex); ++ if (ret != 0) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, ++ "Failed to aquire a lock"); ++ goto out; ++ } ++ /* check and wait for CHILD_DOWN for top xlator*/ ++ while (graph->used) { ++ ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex); ++ if (ret != 0) ++ gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED, ++ "cond wait failed "); ++ } ++ ++ ret = pthread_mutex_unlock(&graph->mutex); ++ if (ret != 0) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, ++ "Failed to release a lock"); ++ } ++ ++ /* Though we got a child down on top xlator, we have to wait until ++ * all the notifier to exit. Because there should not be any threads ++ * that access xl variables. ++ */ ++ pthread_mutex_lock(&ctx->notify_lock); ++ { ++ while (ctx->notifying) ++ pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock); ++ } ++ pthread_mutex_unlock(&ctx->notify_lock); ++ ++ glusterfs_graph_fini(graph); ++ glusterfs_graph_destroy(graph); ++out: ++ return NULL; ++} ++ ++glusterfs_graph_t * ++glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name, ++ char *type) ++{ ++ glusterfs_graph_t *parent_graph = NULL; ++ xlator_t *ixl = NULL; ++ int ret = -1; ++ parent_graph = GF_CALLOC(1, sizeof(*parent_graph), ++ gf_common_mt_glusterfs_graph_t); ++ if (!parent_graph) ++ goto out; ++ ++ INIT_LIST_HEAD(&parent_graph->list); ++ ++ ctx->active = parent_graph; ++ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t); ++ if (!ixl) ++ goto out; ++ ++ ixl->ctx = ctx; ++ ixl->graph = parent_graph; ++ ixl->options = dict_new(); ++ if (!ixl->options) ++ goto out; ++ ++ ixl->name = gf_strdup(name); ++ if (!ixl->name) ++ goto out; ++ ++ ixl->is_autoloaded = 1; ++ ++ if (xlator_set_type(ixl, type) == -1) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, ++ "%s (%s) set type failed", name, type); ++ goto out; ++ } ++ ++ glusterfs_graph_set_first(parent_graph, ixl); ++ parent_graph->top = ixl; ++ ixl = NULL; ++ ++ gettimeofday(&parent_graph->dob, NULL); ++ fill_uuid(parent_graph->graph_uuid, 128); ++ parent_graph->id = ctx->graph_id++; ++ ret = 0; ++out: ++ if (ixl) ++ xlator_destroy(ixl); ++ ++ if (ret) { ++ glusterfs_muxsvc_cleanup_parent(ctx, parent_graph); ++ parent_graph = NULL; ++ } ++ return parent_graph; ++} ++ ++int ++glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) ++{ ++ xlator_t *last_xl = NULL; ++ glusterfs_graph_t *graph = NULL; ++ glusterfs_graph_t *parent_graph = NULL; ++ pthread_t clean_graph = { ++ 0, ++ }; ++ int ret = -1; ++ xlator_t *xl = NULL; ++ ++ if (!ctx || !ctx->active || !volfile_obj) ++ goto out; ++ parent_graph = ctx->active; ++ graph = volfile_obj->graph; ++ if (graph && graph->first) ++ xl = graph->first; ++ ++ last_xl = graph->last_xl; ++ if (last_xl) ++ last_xl->next = NULL; ++ if (!xl || xl->cleanup_starting) ++ goto out; ++ ++ xl->cleanup_starting = 1; ++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, ++ "detaching child %s", volfile_obj->vol_id); ++ ++ list_del_init(&volfile_obj->volfile_list); ++ glusterfs_mux_xlator_unlink(parent_graph->top, xl); ++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); ++ parent_graph->xl_count -= graph->xl_count; ++ parent_graph->leaf_count -= graph->leaf_count; ++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl); ++ parent_graph->id++; ++ ret = 0; ++out: ++ if (!ret) { ++ list_del_init(&volfile_obj->volfile_list); ++ if (graph) { ++ ret = gf_thread_create_detached( ++ &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean"); ++ if (ret) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_CLEANUP_FAILED, ++ "%s failed to create clean " ++ "up thread", ++ volfile_obj->vol_id); ++ ret = 0; ++ } ++ } ++ GF_FREE(volfile_obj); ++ } ++ return ret; ++} ++ ++int ++glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, ++ char *volfile_id, char *checksum) ++{ ++ glusterfs_graph_t *graph = NULL; ++ glusterfs_graph_t *parent_graph = NULL; ++ glusterfs_graph_t *clean_graph = NULL; ++ int ret = -1; ++ xlator_t *xl = NULL; ++ xlator_t *last_xl = NULL; ++ gf_volfile_t *volfile_obj = NULL; ++ pthread_t thread_id = { ++ 0, ++ }; ++ ++ if (!ctx) ++ goto out; ++ parent_graph = ctx->active; ++ graph = glusterfs_graph_construct(fp); ++ if (!graph) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to construct the graph"); ++ goto out; ++ } ++ graph->last_xl = glusterfs_get_last_xlator(graph); ++ ++ for (xl = graph->first; xl; xl = xl->next) { ++ if (strcmp(xl->type, "mount/fuse") == 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_ATTACH_FAILED, ++ "fuse xlator cannot be specified in volume file"); ++ goto out; ++ } ++ } ++ ++ graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph)); ++ xl = graph->first; ++ /* TODO memory leaks everywhere need to free graph in case of error */ ++ if (glusterfs_graph_prepare(graph, ctx, xl->name)) { ++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to prepare graph for xlator %s", xl->name); ++ ret = -1; ++ goto out; ++ } else if (glusterfs_graph_init(graph)) { ++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to initialize graph for xlator %s", xl->name); ++ ret = -1; ++ goto out; ++ } else if (glusterfs_graph_parent_up(graph)) { ++ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, ++ "failed to link the graphs for xlator %s ", xl->name); ++ ret = -1; ++ goto out; ++ } ++ ++ if (!parent_graph) { ++ parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd", ++ "debug/io-stats"); ++ if (!parent_graph) ++ goto out; ++ ((xlator_t *)parent_graph->top)->next = xl; ++ clean_graph = parent_graph; ++ } else { ++ last_xl = parent_graph->last_xl; ++ if (last_xl) ++ last_xl->next = xl; ++ xl->prev = last_xl; ++ } ++ parent_graph->last_xl = graph->last_xl; ++ ++ ret = glusterfs_xlator_link(parent_graph->top, xl); ++ if (ret) { ++ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED, ++ "parent up notification failed"); ++ goto out; ++ } ++ parent_graph->xl_count += graph->xl_count; ++ parent_graph->leaf_count += graph->leaf_count; ++ parent_graph->id++; ++ ++ if (!volfile_obj) { ++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); ++ if (!volfile_obj) { ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ graph->used = 1; ++ parent_graph->id++; ++ list_add(&graph->list, &ctx->graphs); ++ INIT_LIST_HEAD(&volfile_obj->volfile_list); ++ volfile_obj->graph = graph; ++ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", ++ volfile_id); ++ memcpy(volfile_obj->volfile_checksum, checksum, ++ sizeof(volfile_obj->volfile_checksum)); ++ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list); ++ ++ gf_log_dump_graph(fp, graph); ++ graph = NULL; ++ ++ ret = 0; ++out: ++ if (ret) { ++ if (graph) { ++ gluster_graph_take_reference(graph->first); ++ ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup, ++ graph, "graph_clean"); ++ if (ret) { ++ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_CLEANUP_FAILED, ++ "%s failed to create clean " ++ "up thread", ++ volfile_id); ++ ret = 0; ++ } ++ } ++ if (clean_graph) ++ glusterfs_muxsvc_cleanup_parent(ctx, clean_graph); ++ } ++ return ret; ++} ++ ++int ++glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, ++ gf_volfile_t *volfile_obj, char *checksum) ++{ ++ glusterfs_graph_t *oldvolfile_graph = NULL; ++ glusterfs_graph_t *newvolfile_graph = NULL; ++ ++ int ret = -1; ++ ++ if (!ctx) { ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, ++ "ctx is NULL"); ++ goto out; ++ } ++ ++ /* Change the message id */ ++ if (!volfile_obj) { ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, ++ "failed to get volfile object"); ++ goto out; ++ } ++ ++ oldvolfile_graph = volfile_obj->graph; ++ if (!oldvolfile_graph) { ++ goto out; ++ } ++ ++ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp); ++ ++ if (!newvolfile_graph) { ++ goto out; ++ } ++ newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph); ++ ++ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first); ++ ++ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) { ++ ret = glusterfs_process_svc_detach(ctx, volfile_obj); ++ if (ret) { ++ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, ++ LG_MSG_GRAPH_CLEANUP_FAILED, ++ "Could not detach " ++ "old graph. Aborting the reconfiguration operation"); ++ goto out; ++ } ++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, ++ volfile_obj->vol_id, checksum); ++ goto out; ++ } ++ ++ gf_msg_debug("glusterfsd-mgmt", 0, ++ "Only options have changed in the" ++ " new graph"); ++ ++ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph); ++ if (ret) { ++ gf_msg_debug("glusterfsd-mgmt", 0, ++ "Could not reconfigure " ++ "new options in old graph"); ++ goto out; ++ } ++ memcpy(volfile_obj->volfile_checksum, checksum, ++ sizeof(volfile_obj->volfile_checksum)); ++ ++ ret = 0; ++out: ++ ++ if (newvolfile_graph) ++ glusterfs_graph_destroy(newvolfile_graph); ++ ++ return ret; ++} +diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y +index 5b92985..c60ff38 100644 +--- a/libglusterfs/src/graph.y ++++ b/libglusterfs/src/graph.y +@@ -542,6 +542,9 @@ glusterfs_graph_new () + + INIT_LIST_HEAD (&graph->list); + ++ pthread_mutex_init(&graph->mutex, NULL); ++ pthread_cond_init(&graph->child_down_cond, NULL); ++ + gettimeofday (&graph->dob, NULL); + + return graph; +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index e33d5cf..fa2025e 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1154,3 +1154,8 @@ gf_changelog_register_generic + gf_gfid_generate_from_xxh64 + find_xlator_option_in_cmd_args_t + gf_d_type_from_ia_type ++glusterfs_graph_fini ++glusterfs_process_svc_attach_volfp ++glusterfs_mux_volfile_reconfigure ++glusterfs_process_svc_detach ++mgmt_is_multiplexed_daemon +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index d18b50f..0cf80c0 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -810,7 +810,8 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + if (!ctx) + goto out; + +- if (ctx && ctx->active) { ++ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) && ++ (ctx && ctx->active)) { + top = ctx->active->first; + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + brick_count++; +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index dc1e887..5d6f8d2 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1463,3 +1463,19 @@ gluster_graph_take_reference(xlator_t *tree) + } + return; + } ++ ++gf_boolean_t ++mgmt_is_multiplexed_daemon(char *name) ++{ ++ const char *mux_daemons[] = {"glustershd", NULL}; ++ int i; ++ ++ if (!name) ++ return _gf_false; ++ ++ for (i = 0; mux_daemons[i]; i++) { ++ if (!strcmp(name, mux_daemons[i])) ++ return _gf_true; ++ } ++ return _gf_false; ++} +diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h +index 779878f..7275d75 100644 +--- a/rpc/rpc-lib/src/protocol-common.h ++++ b/rpc/rpc-lib/src/protocol-common.h +@@ -245,6 +245,8 @@ enum glusterd_brick_procnum { + GLUSTERD_NODE_BITROT, + GLUSTERD_BRICK_ATTACH, + GLUSTERD_DUMP_METRICS, ++ GLUSTERD_SVC_ATTACH, ++ GLUSTERD_SVC_DETACH, + GLUSTERD_BRICK_MAXVALUE, + }; + +diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t +index ca112ad..7dae3c3 100644 +--- a/tests/basic/glusterd/heald.t ++++ b/tests/basic/glusterd/heald.t +@@ -7,11 +7,16 @@ + # Covers enable/disable at the moment. Will be enhanced later to include + # the other commands as well. + ++function is_pid_running { ++ local pid=$1 ++ num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l` ++ echo $num ++} ++ + cleanup; + TEST glusterd + TEST pidof glusterd + +-volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" + #Commands should fail when volume doesn't exist + TEST ! $CLI volume heal non-existent-volume enable + TEST ! $CLI volume heal non-existent-volume disable +@@ -20,51 +25,55 @@ TEST ! $CLI volume heal non-existent-volume disable + # volumes + TEST $CLI volume create dist $H0:$B0/dist + TEST $CLI volume start dist +-TEST "[ -z $(get_shd_process_pid)]" ++TEST "[ -z $(get_shd_process_pid dist)]" + TEST ! $CLI volume heal dist enable + TEST ! $CLI volume heal dist disable + + # Commands should work on replicate/disperse volume. + TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1 +-TEST "[ -z $(get_shd_process_pid)]" ++TEST "[ -z $(get_shd_process_pid r2)]" + TEST $CLI volume start r2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2 + TEST $CLI volume heal r2 enable + EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol" ++EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2 ++pid=$( get_shd_process_pid r2 ) + TEST $CLI volume heal r2 disable + EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "1" is_pid_running $pid + + # Commands should work on disperse volume. + TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 + TEST $CLI volume start ec2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2 + TEST $CLI volume heal ec2 enable + EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol" ++EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2 ++pid=$(get_shd_process_pid ec2) + TEST $CLI volume heal ec2 disable + EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ++EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "1" is_pid_running $pid + + #Check that shd graph is rewritten correctly on volume stop/start +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse ++ ++EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate + TEST $CLI volume stop r2 +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse + TEST $CLI volume stop ec2 + # When both the volumes are stopped glustershd volfile is not modified just the + # process is stopped +-TEST "[ -z $(get_shd_process_pid) ]" ++TEST "[ -z $(get_shd_process_pid dist) ]" ++TEST "[ -z $(get_shd_process_pid ec2) ]" + + TEST $CLI volume start r2 +-EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate + + TEST $CLI volume set r2 self-heal-daemon on + TEST $CLI volume set r2 cluster.self-heal-daemon off +diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t +index cdb1a33..e6e65c4 100644 +--- a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t ++++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t +@@ -55,9 +55,9 @@ TEST kill_glusterd 1 + #Bring back 1st glusterd + TEST $glusterd_1 + +-# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not +-# come up on node 2 +-sleep $PROCESS_UP_TIMEOUT +-EXPECT "N" shd_up_status_2 ++# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started ++#on node 2, because once glusterd regains quorum, it will restart all volume ++#level daemons ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2 + + cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index 87ca958..289b197 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -237,11 +237,13 @@ function ec_child_up_count_shd { + } + + function get_shd_process_pid { +- ps auxww | grep glusterfs | grep -E "glustershd/glustershd.pid" | awk '{print $2}' | head -1 ++ local vol=$1 ++ ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1 + } + + function generate_shd_statedump { +- generate_statedump $(get_shd_process_pid) ++ local vol=$1 ++ generate_statedump $(get_shd_process_pid $vol) + } + + function generate_nfs_statedump { +diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am +index 5fe5156..11ae189 100644 +--- a/xlators/mgmt/glusterd/src/Makefile.am ++++ b/xlators/mgmt/glusterd/src/Makefile.am +@@ -18,11 +18,12 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \ + glusterd-snapshot-utils.c glusterd-conn-mgmt.c \ +- glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \ ++ glusterd-proc-mgmt.c glusterd-svc-mgmt.c \ + glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \ + glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \ + glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \ + glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c \ ++ glusterd-shd-svc.c glusterd-shd-svc-helper.c \ + glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c + + +@@ -38,11 +39,12 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ + glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \ + glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \ + glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \ +- glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \ ++ glusterd-svc-mgmt.h glusterd-nfs-svc.h \ + glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \ + glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \ + glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \ + glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \ ++ glusterd-shd-svc.h glusterd-shd-svc-helper.h \ + glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \ + $(CONTRIBDIR)/userspace-rcu/rculist-extra.h + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index ad9a572..042a805 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2863,7 +2863,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr) + } + + if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +index e80e152..052438c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +@@ -132,3 +132,45 @@ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + glusterd_set_socket_filepath(sockfilepath, socketpath, len); + return 0; + } ++ ++int ++__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, ++ rpc_clnt_event_t event, void *data) ++{ ++ glusterd_conf_t *conf = THIS->private; ++ glusterd_svc_proc_t *mux_proc = mydata; ++ int ret = -1; ++ ++ /* Silently ignoring this error, exactly like the current ++ * implementation */ ++ if (!mux_proc) ++ return 0; ++ ++ if (event == RPC_CLNT_DESTROY) { ++ /*RPC_CLNT_DESTROY will only called after mux_proc detached from the ++ * list. So it is safe to call without lock. Processing ++ * RPC_CLNT_DESTROY under a lock will lead to deadlock. ++ */ ++ if (mux_proc->data) { ++ glusterd_volinfo_unref(mux_proc->data); ++ mux_proc->data = NULL; ++ } ++ GF_FREE(mux_proc); ++ ret = 0; ++ } else { ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ ret = mux_proc->notify(mux_proc, event); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ } ++ return ret; ++} ++ ++int ++glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, ++ rpc_clnt_event_t event, void *data) ++{ ++ return glusterd_big_locked_notify(rpc, mydata, event, data, ++ __glusterd_muxsvc_conn_common_notify); ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h +index 602c0ba..d1c4607 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h +@@ -43,9 +43,11 @@ glusterd_conn_disconnect(glusterd_conn_t *conn); + int + glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); ++int ++glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, ++ rpc_clnt_event_t event, void *data); + + int32_t + glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + int len); +- + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +index f9c8617..b01fd4d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +@@ -370,6 +370,7 @@ int + glusterd_gfproxydsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; + int ret = -1; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -380,7 +381,7 @@ glusterd_gfproxydsvc_restart() + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + +- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume gfproxyd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 528993c..1cb9013 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -5928,6 +5928,11 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + + GF_FREE(rebal_data); + ++ fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count, ++ volinfo->shd.svc.online ? "Online" : "Offline"); ++ fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count, ++ volinfo->shd.svc.inited ? "True" : "False"); ++ + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + ret = glusterd_volume_get_hot_tier_type_str(volinfo, + &hot_tier_type_str); +@@ -5997,12 +6002,6 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + + fprintf(fp, "\n[Services]\n"); + +- if (priv->shd_svc.inited) { +- fprintf(fp, "svc%d.name: %s\n", ++count, priv->shd_svc.name); +- fprintf(fp, "svc%d.online_status: %s\n\n", count, +- priv->shd_svc.online ? "Online" : "Offline"); +- } +- + if (priv->nfs_svc.inited) { + fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name); + fprintf(fp, "svc%d.online_status: %s\n\n", count, +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index 5599a63..1ba58c3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -30,6 +30,7 @@ + #include "rpcsvc.h" + #include "rpc-common-xdr.h" + #include "glusterd-gfproxyd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + + extern struct rpc_clnt_program gd_peer_prog; + extern struct rpc_clnt_program gd_mgmt_prog; +@@ -328,6 +329,26 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, + goto out; + } + ++ volid_ptr = strstr(volume_id, "shd/"); ++ if (volid_ptr) { ++ volid_ptr = strchr(volid_ptr, '/'); ++ if (!volid_ptr) { ++ ret = -1; ++ goto out; ++ } ++ volid_ptr++; ++ ++ ret = glusterd_volinfo_find(volid_ptr, &volinfo); ++ if (ret == -1) { ++ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); ++ goto out; ++ } ++ ++ glusterd_svc_build_shd_volfile_path(volinfo, path, path_len); ++ ret = 0; ++ goto out; ++ } ++ + volid_ptr = strstr(volume_id, "/snaps/"); + if (volid_ptr) { + ret = get_snap_volname_and_volinfo(volid_ptr, &volname, &volinfo); +diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h +index 7a784db..17052ce 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h ++++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h +@@ -51,6 +51,7 @@ typedef enum gf_gld_mem_types_ { + gf_gld_mt_missed_snapinfo_t, + gf_gld_mt_snap_create_args_t, + gf_gld_mt_glusterd_brick_proc_t, ++ gf_gld_mt_glusterd_svc_proc_t, + gf_gld_mt_end, + } gf_gld_mem_types_t; + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h +index c7b3ca8..424e15f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-messages.h ++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h +@@ -298,6 +298,8 @@ GLFS_MSGID( + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, + GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED, +- GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); ++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL, ++ GD_MSG_SHD_START_FAIL, GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, ++ GD_MSG_ATTACH_INFO, GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index df8a6ab..95f9707 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -44,6 +44,7 @@ + #include "glusterd-snapshot-utils.h" + #include "glusterd-svc-mgmt.h" + #include "glusterd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-shd-svc.h" + #include "glusterd-nfs-svc.h" + #include "glusterd-quotad-svc.h" +@@ -2223,6 +2224,11 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key, + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -2237,7 +2243,7 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key, + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) + goto out; + } +@@ -2693,6 +2699,11 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -2706,7 +2717,7 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, + } + } + if (svcs_reconfigure) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart " +@@ -3091,6 +3102,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -3106,7 +3122,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); +@@ -3139,6 +3155,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + if (ret) + goto out; + ++ svc = &(volinfo->shd.svc); ++ ret = svc->reconfigure(volinfo); ++ if (ret) ++ goto out; ++ + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -3154,7 +3175,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); +@@ -3361,7 +3382,7 @@ glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) + goto out; + } +@@ -3644,14 +3665,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + other_count++; + node_count++; + +- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { +- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, 0, +- vol_opts); +- if (ret) +- goto out; +- other_count++; +- node_count++; +- + } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0, + vol_opts); +@@ -3685,6 +3698,12 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + goto out; + other_count++; + node_count++; ++ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { ++ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index); ++ if (ret) ++ goto out; ++ other_count++; ++ node_count++; + } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick); + if (ret) +@@ -3747,6 +3766,19 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + node_count++; + } + ++ if (glusterd_is_shd_compatible_volume(volinfo)) { ++ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); ++ if (shd_enabled) { ++ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, ++ other_index); ++ if (ret) ++ goto out; ++ other_count++; ++ other_index++; ++ node_count++; ++ } ++ } ++ + nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY, + _gf_false); + if (!nfs_disabled) { +@@ -3759,18 +3791,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + node_count++; + } + +- if (glusterd_is_shd_compatible_volume(volinfo)) +- shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); +- if (shd_enabled) { +- ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, +- other_index, vol_opts); +- if (ret) +- goto out; +- other_count++; +- node_count++; +- other_index++; +- } +- + if (glusterd_is_volume_quota_enabled(volinfo)) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, + other_index, vol_opts); +@@ -6875,16 +6895,18 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op, + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; ++ glusterd_svc_t *svc = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); ++ svc = &(volinfo->shd.svc); + + switch (heal_op) { + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " +@@ -6905,7 +6927,7 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op, + break; + + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " +@@ -7040,7 +7062,7 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr, + ret = -1; + goto out; + } else { +- pending_node->node = &(priv->shd_svc); ++ pending_node->node = &(volinfo->shd.svc); + pending_node->type = GD_NODE_SHD; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; +@@ -7174,6 +7196,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + glusterd_pending_node_t *pending_node = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; ++ glusterd_svc_t *svc = NULL; + + GF_ASSERT(dict); + +@@ -7269,7 +7292,8 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + + ret = 0; + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { +- if (!priv->shd_svc.online) { ++ svc = &(volinfo->shd.svc); ++ if (!svc->online) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED, + "Self-heal daemon is not running"); +@@ -7281,7 +7305,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + ret = -1; + goto out; + } +- pending_node->node = &(priv->shd_svc); ++ pending_node->node = svc; + pending_node->type = GD_NODE_SHD; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +new file mode 100644 +index 0000000..9196758 +--- /dev/null ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +@@ -0,0 +1,140 @@ ++/* ++ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "glusterd.h" ++#include "glusterd-utils.h" ++#include "glusterd-shd-svc-helper.h" ++#include "glusterd-messages.h" ++#include "glusterd-volgen.h" ++ ++void ++glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, ++ int path_len) ++{ ++ char sockfilepath[PATH_MAX] = { ++ 0, ++ }; ++ char rundir[PATH_MAX] = { ++ 0, ++ }; ++ int32_t len = 0; ++ glusterd_conf_t *priv = THIS->private; ++ ++ if (!priv) ++ return; ++ ++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); ++ len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, ++ uuid_utoa(MY_UUID)); ++ if ((len < 0) || (len >= sizeof(sockfilepath))) { ++ sockfilepath[0] = 0; ++ } ++ ++ glusterd_set_socket_filepath(sockfilepath, path, path_len); ++} ++ ++void ++glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, ++ int path_len) ++{ ++ char rundir[PATH_MAX] = { ++ 0, ++ }; ++ glusterd_conf_t *priv = THIS->private; ++ ++ if (!priv) ++ return; ++ ++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); ++ ++ snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname); ++} ++ ++void ++glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, ++ int path_len) ++{ ++ char workdir[PATH_MAX] = { ++ 0, ++ }; ++ glusterd_conf_t *priv = THIS->private; ++ ++ if (!priv) ++ return; ++ ++ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); ++ ++ snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname); ++} ++ ++void ++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len) ++{ ++ snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname); ++} ++ ++void ++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len) ++{ ++ snprintf(logfile, len, "%s/shd.log", logdir); ++} ++ ++void ++glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_svc_t *svc = NULL; ++ glusterd_conf_t *conf = NULL; ++ gf_boolean_t need_unref = _gf_false; ++ rpc_clnt_t *rpc = NULL; ++ ++ conf = THIS->private; ++ if (!conf) ++ return; ++ ++ GF_VALIDATE_OR_GOTO(THIS->name, conf, out); ++ GF_VALIDATE_OR_GOTO(THIS->name, shd, out); ++ ++ svc = &shd->svc; ++ shd->attached = _gf_false; ++ ++ if (svc->conn.rpc) { ++ rpc_clnt_unref(svc->conn.rpc); ++ svc->conn.rpc = NULL; ++ } ++ ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ svc_proc = svc->svc_proc; ++ svc->svc_proc = NULL; ++ svc->inited = _gf_false; ++ cds_list_del_init(&svc->mux_svc); ++ glusterd_unlink_file(svc->proc.pidfile); ++ ++ if (svc_proc && cds_list_empty(&svc_proc->svcs)) { ++ cds_list_del_init(&svc_proc->svc_proc_list); ++ /* We cannot free svc_proc list from here. Because ++ * if there are pending events on the rpc, it will ++ * try to access the corresponding svc_proc, so unrefing ++ * rpc request and then cleaning up the memory is carried ++ * from the notify function upon RPC_CLNT_DESTROY destroy. ++ */ ++ need_unref = _gf_true; ++ rpc = svc_proc->rpc; ++ svc_proc->rpc = NULL; ++ } ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ /*rpc unref has to be performed outside the lock*/ ++ if (need_unref && rpc) ++ rpc_clnt_unref(rpc); ++out: ++ return; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +new file mode 100644 +index 0000000..c70702c +--- /dev/null ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +@@ -0,0 +1,45 @@ ++/* ++ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef _GLUSTERD_SHD_SVC_HELPER_H_ ++#define _GLUSTERD_SHD_SVC_HELPER_H_ ++ ++#include "glusterd.h" ++#include "glusterd-svc-mgmt.h" ++ ++void ++glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, ++ int path_len); ++ ++void ++glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, ++ int path_len); ++ ++void ++glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, ++ int path_len); ++ ++void ++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len); ++ ++void ++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len); ++ ++void ++glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd); ++ ++int ++glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, ++ glusterd_svc_t *svc, int flags); ++ ++int ++glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo); ++ ++#endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 69e27cb..937ea30 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -13,9 +13,10 @@ + #include "glusterd.h" + #include "glusterd-utils.h" + #include "glusterd-volgen.h" +-#include "glusterd-svc-mgmt.h" + #include "glusterd-shd-svc.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-svc-helper.h" ++#include "glusterd-store.h" + + #define GD_SHD_PROCESS_NAME "--process-name" + char *shd_svc_name = "glustershd"; +@@ -23,27 +24,145 @@ char *shd_svc_name = "glustershd"; + void + glusterd_shdsvc_build(glusterd_svc_t *svc) + { ++ int ret = -1; ++ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); ++ if (ret < 0) ++ return; ++ ++ CDS_INIT_LIST_HEAD(&svc->mux_svc); + svc->manager = glusterd_shdsvc_manager; + svc->start = glusterd_shdsvc_start; +- svc->stop = glusterd_svc_stop; ++ svc->stop = glusterd_shdsvc_stop; ++ svc->reconfigure = glusterd_shdsvc_reconfigure; + } + + int +-glusterd_shdsvc_init(glusterd_svc_t *svc) ++glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, ++ glusterd_svc_proc_t *mux_svc) + { +- return glusterd_svc_init(svc, shd_svc_name); ++ int ret = -1; ++ char rundir[PATH_MAX] = { ++ 0, ++ }; ++ char sockpath[PATH_MAX] = { ++ 0, ++ }; ++ char pidfile[PATH_MAX] = { ++ 0, ++ }; ++ char volfile[PATH_MAX] = { ++ 0, ++ }; ++ char logdir[PATH_MAX] = { ++ 0, ++ }; ++ char logfile[PATH_MAX] = { ++ 0, ++ }; ++ char volfileid[256] = {0}; ++ glusterd_svc_t *svc = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *priv = NULL; ++ glusterd_muxsvc_conn_notify_t notify = NULL; ++ xlator_t *this = NULL; ++ char *volfileserver = NULL; ++ int32_t len = 0; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO(THIS->name, this, out); ++ ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ volinfo = data; ++ GF_VALIDATE_OR_GOTO(this->name, data, out); ++ GF_VALIDATE_OR_GOTO(this->name, mux_svc, out); ++ ++ svc = &(volinfo->shd.svc); ++ ++ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); ++ if (ret < 0) ++ goto out; ++ ++ notify = glusterd_muxsvc_common_rpc_notify; ++ glusterd_store_perform_node_state_store(volinfo); ++ ++ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); ++ glusterd_svc_create_rundir(rundir); ++ ++ glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir)); ++ glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile)); ++ ++ /* Initialize the connection mgmt */ ++ if (mux_conn && mux_svc->rpc) { ++ /* multiplexed svc */ ++ svc->conn.frame_timeout = mux_conn->frame_timeout; ++ /* This will be unrefed from glusterd_shd_svcproc_cleanup*/ ++ svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc); ++ ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s", ++ mux_conn->sockpath); ++ } else { ++ ret = mkdir_p(logdir, 0755, _gf_true); ++ if ((ret == -1) && (EEXIST != errno)) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, ++ "Unable to create logdir %s", logdir); ++ goto out; ++ } ++ ++ glusterd_svc_build_shd_socket_filepath(volinfo, sockpath, ++ sizeof(sockpath)); ++ ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600, ++ notify); ++ if (ret) ++ goto out; ++ /* This will be unrefed when the last svcs is detached from the list */ ++ if (!mux_svc->rpc) ++ mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc); ++ } ++ ++ /* Initialize the process mgmt */ ++ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile)); ++ glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX); ++ len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname); ++ if ((len < 0) || (len >= sizeof(volfileid))) { ++ ret = -1; ++ goto out; ++ } ++ ++ if (dict_get_strn(this->options, "transport.socket.bind-address", ++ SLEN("transport.socket.bind-address"), ++ &volfileserver) != 0) { ++ volfileserver = "localhost"; ++ } ++ ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir, ++ logfile, volfile, volfileid, volfileserver); ++ if (ret) ++ goto out; ++ ++out: ++ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); ++ return ret; + } + +-static int +-glusterd_shdsvc_create_volfile() ++int ++glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) + { + char filepath[PATH_MAX] = { + 0, + }; ++ + int ret = -1; +- glusterd_conf_t *conf = THIS->private; + dict_t *mod_dict = NULL; + ++ glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX); ++ if (!glusterd_is_shd_compatible_volume(volinfo)) { ++ /* If volfile exist, delete it. This case happens when we ++ * change from replica/ec to distribute. ++ */ ++ (void)glusterd_unlink_file(filepath); ++ ret = 0; ++ goto out; ++ } + mod_dict = dict_new(); + if (!mod_dict) + goto out; +@@ -64,9 +183,7 @@ glusterd_shdsvc_create_volfile() + if (ret) + goto out; + +- glusterd_svc_build_volfile_path(shd_svc_name, conf->workdir, filepath, +- sizeof(filepath)); +- ret = glusterd_create_global_volfile(build_shd_graph, filepath, mod_dict); ++ ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); +@@ -81,26 +198,89 @@ out: + return ret; + } + ++gf_boolean_t ++glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_svc_t *temp_svc = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ gf_boolean_t comp = _gf_false; ++ glusterd_conf_t *conf = THIS->private; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ svc_proc = svc->svc_proc; ++ if (!svc_proc) ++ goto unlock; ++ cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc) ++ { ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ goto unlock; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ goto unlock; ++ } ++ if (!glusterd_is_shd_compatible_volume(volinfo)) ++ continue; ++ if (volinfo->status == GLUSTERD_STATUS_STARTED) ++ goto unlock; ++ } ++ comp = _gf_true; ++ } ++unlock: ++ pthread_mutex_unlock(&conf->attach_lock); ++out: ++ return comp; ++} ++ + int + glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + { +- int ret = 0; ++ int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + +- if (!svc->inited) { +- ret = glusterd_shdsvc_init(svc); +- if (ret) { +- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, +- "Failed to init shd " +- "service"); +- goto out; +- } else { +- svc->inited = _gf_true; +- gf_msg_debug(THIS->name, 0, "shd service initialized"); ++ volinfo = data; ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); ++ ++ if (volinfo) ++ glusterd_volinfo_ref(volinfo); ++ ++ ret = glusterd_shdsvc_create_volfile(volinfo); ++ if (ret) ++ goto out; ++ ++ if (!glusterd_is_shd_compatible_volume(volinfo)) { ++ ret = 0; ++ if (svc->inited) { ++ /* This means glusterd was running for this volume and now ++ * it was converted to a non-shd volume. So just stop the shd ++ */ ++ ret = svc->stop(svc, SIGTERM); + } ++ goto out; + } + +- volinfo = data; ++ ret = glusterd_shd_svc_mux_init(volinfo, svc); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, ++ "Failed to init shd service"); ++ goto out; ++ } + + /* If all the volumes are stopped or all shd compatible volumes + * are stopped then stop the service if: +@@ -110,31 +290,26 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + * - volinfo is NULL or + * - volinfo is present and volume is shd compatible + */ +- if (glusterd_are_all_volumes_stopped() || +- glusterd_all_shd_compatible_volumes_stopped()) { +- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) { +- ret = svc->stop(svc, SIGTERM); +- } +- } else { +- if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) { +- ret = glusterd_shdsvc_create_volfile(); +- if (ret) +- goto out; +- +- ret = svc->stop(svc, SIGTERM); +- if (ret) +- goto out; ++ if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) { ++ /* TODO ++ * Take a lock and detach all svc's to stop the process ++ * also reset the init flag ++ */ ++ ret = svc->stop(svc, SIGTERM); ++ } else if (volinfo) { ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) ++ goto out; + ++ if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = svc->start(svc, flags); + if (ret) + goto out; +- +- ret = glusterd_conn_connect(&(svc->conn)); +- if (ret) +- goto out; + } + } + out: ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); +@@ -143,7 +318,7 @@ out: + } + + int +-glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) ++glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) + { + int ret = -1; + char glusterd_uuid_option[PATH_MAX] = {0}; +@@ -178,31 +353,136 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + goto out; + + ret = glusterd_svc_start(svc, flags, cmdline); ++ if (ret) ++ goto out; + ++ ret = glusterd_conn_connect(&(svc->conn)); + out: + if (cmdline) + dict_unref(cmdline); ++ return ret; ++} + ++int ++glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, ++ glusterd_svc_t *svc, int flags) ++{ ++ int ret = -1; ++ glusterd_svc_proc_t *mux_proc = NULL; ++ glusterd_conf_t *conf = NULL; ++ ++ conf = THIS->private; ++ ++ if (!conf || !volinfo || !svc) ++ return -1; ++ glusterd_shd_svcproc_cleanup(&volinfo->shd); ++ mux_proc = glusterd_svcprocess_new(); ++ if (!mux_proc) { ++ return -1; ++ } ++ ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc); ++ if (ret) ++ return -1; ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); ++ svc->svc_proc = mux_proc; ++ cds_list_del_init(&svc->mux_svc); ++ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ ++ ret = glusterd_new_shd_svc_start(svc, flags); ++ if (!ret) { ++ volinfo->shd.attached = _gf_true; ++ } ++ return ret; ++} ++ ++int ++glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) ++{ ++ int ret = -1; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *conf = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ return -1; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ return -1; ++ } ++ ++ if (volinfo->status != GLUSTERD_STATUS_STARTED) ++ return -1; ++ ++ glusterd_volinfo_ref(volinfo); ++ if (!svc->inited) { ++ ret = glusterd_shd_svc_mux_init(volinfo, svc); ++ if (ret) ++ goto out; ++ } ++ ++ if (shd->attached) { ++ ret = glusterd_attach_svc(svc, volinfo, flags); ++ if (ret) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to attach shd svc(volume=%s) to pid=%d. Starting" ++ "a new process", ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags); ++ } ++ goto out; ++ } ++ ret = glusterd_new_shd_svc_start(svc, flags); ++ if (!ret) { ++ shd->attached = _gf_true; ++ } ++out: ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; + } + + int +-glusterd_shdsvc_reconfigure() ++glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) + { + int ret = -1; + xlator_t *this = NULL; +- glusterd_conf_t *priv = NULL; + gf_boolean_t identical = _gf_false; ++ dict_t *mod_dict = NULL; ++ glusterd_svc_t *svc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + +- priv = this->private; +- GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ if (!volinfo) { ++ /* reconfigure will be called separately*/ ++ ret = 0; ++ goto out; ++ } + +- if (glusterd_all_shd_compatible_volumes_stopped()) ++ glusterd_volinfo_ref(volinfo); ++ svc = &(volinfo->shd.svc); ++ if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) + goto manager; + + /* +@@ -210,8 +490,42 @@ glusterd_shdsvc_reconfigure() + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ +- ret = glusterd_svc_check_volfile_identical(priv->shd_svc.name, +- build_shd_graph, &identical); ++ ++ if (!glusterd_is_shd_compatible_volume(volinfo)) { ++ if (svc->inited) ++ goto manager; ++ ++ /* Nothing to do if not shd compatible */ ++ ret = 0; ++ goto out; ++ } ++ mod_dict = dict_new(); ++ if (!mod_dict) ++ goto out; ++ ++ ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on"); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on"); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_int32(mod_dict, "graph-check", 1); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on"); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_volume_svc_check_volfile_identical( ++ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, ++ &identical); + if (ret) + goto out; + +@@ -226,8 +540,9 @@ glusterd_shdsvc_reconfigure() + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ +- ret = glusterd_svc_check_topology_identical(priv->shd_svc.name, +- build_shd_graph, &identical); ++ ret = glusterd_volume_svc_check_topology_identical( ++ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, ++ &identical); + if (ret) + goto out; + +@@ -235,7 +550,7 @@ glusterd_shdsvc_reconfigure() + * options to shd volfile, so that shd will be reconfigured. + */ + if (identical) { +- ret = glusterd_shdsvc_create_volfile(); ++ ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } +@@ -243,12 +558,129 @@ glusterd_shdsvc_reconfigure() + } + manager: + /* +- * shd volfile's topology has been changed. shd server needs +- * to be RESTARTED to ACT on the changed volfile. ++ * shd volfile's topology has been changed. volfile needs ++ * to be RECONFIGURED to ACT on the changed volfile. + */ +- ret = priv->shd_svc.manager(&(priv->shd_svc), NULL, PROC_START_NO_WAIT); ++ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); + + out: ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); ++ if (mod_dict) ++ dict_unref(mod_dict); + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; + } ++ ++int ++glusterd_shdsvc_restart() ++{ ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; ++ int ret = -1; ++ xlator_t *this = THIS; ++ glusterd_conf_t *conf = NULL; ++ glusterd_svc_t *svc = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ pthread_mutex_lock(&conf->volume_lock); ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) ++ { ++ glusterd_volinfo_ref(volinfo); ++ pthread_mutex_unlock(&conf->volume_lock); ++ /* Start per volume shd svc */ ++ if (volinfo->status == GLUSTERD_STATUS_STARTED) { ++ svc = &(volinfo->shd.svc); ++ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL, ++ "Couldn't start shd for " ++ "vol: %s on restart", ++ volinfo->volname); ++ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", ++ volinfo->volname, svc->name); ++ glusterd_volinfo_unref(volinfo); ++ goto out; ++ } ++ } ++ glusterd_volinfo_unref(volinfo); ++ pthread_mutex_lock(&conf->volume_lock); ++ } ++ pthread_mutex_unlock(&conf->volume_lock); ++out: ++ return ret; ++} ++ ++int ++glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) ++{ ++ int ret = -1; ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ gf_boolean_t empty = _gf_false; ++ glusterd_conf_t *conf = NULL; ++ int pid = -1; ++ ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ svc_proc = svc->svc_proc; ++ GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ return -1; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ return -1; ++ } ++ ++ glusterd_volinfo_ref(volinfo); ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ gf_is_service_running(svc->proc.pidfile, &pid); ++ cds_list_del_init(&svc->mux_svc); ++ empty = cds_list_empty(&svc_proc->svcs); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ if (empty) { ++ /* Unref will happen when destroying the connection */ ++ glusterd_volinfo_ref(volinfo); ++ svc_proc->data = volinfo; ++ ret = glusterd_svc_stop(svc, sig); ++ } ++ if (!empty && pid != -1) { ++ ret = glusterd_detach_svc(svc, volinfo, sig); ++ if (ret) ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, ++ "shd service is failed to detach volume %s from pid %d", ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ else ++ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS, ++ "Shd service is detached for volume %s from pid %d", ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ } ++ svc->online = _gf_false; ++ (void)glusterd_unlink_file((char *)svc->proc.pidfile); ++ glusterd_shd_svcproc_cleanup(shd); ++ ret = 0; ++ glusterd_volinfo_unref(volinfo); ++out: ++ gf_msg_debug(THIS->name, 0, "Returning %d", ret); ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h +index 775a9d4..55b409f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h +@@ -12,12 +12,20 @@ + #define _GLUSTERD_SHD_SVC_H_ + + #include "glusterd-svc-mgmt.h" ++#include "glusterd.h" ++ ++typedef struct glusterd_shdsvc_ glusterd_shdsvc_t; ++struct glusterd_shdsvc_ { ++ glusterd_svc_t svc; ++ gf_boolean_t attached; ++}; + + void + glusterd_shdsvc_build(glusterd_svc_t *svc); + + int +-glusterd_shdsvc_init(glusterd_svc_t *svc); ++glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, ++ glusterd_svc_proc_t *svc_proc); + + int + glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags); +@@ -27,4 +35,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags); + + int + glusterd_shdsvc_reconfigure(); ++ ++int ++glusterd_shdsvc_restart(); ++ ++int ++glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 54a7bd1..943b1c6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -748,6 +748,16 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv) + } + } + ++ if (glusterd_is_shd_compatible_volume(volinfo)) { ++ svc = &(volinfo->shd.svc); ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, ++ "Failed " ++ "to stop shd daemon service"); ++ } ++ } ++ + if (glusterd_is_gfproxyd_enabled(volinfo)) { + svc = &(volinfo->gfproxyd.svc); + ret = svc->stop(svc, SIGTERM); +@@ -775,7 +785,7 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv) + } + + /*Reconfigure all daemon services upon peer detach*/ +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +index 56bab07..1da4076 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +@@ -366,6 +366,7 @@ int + glusterd_snapdsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -376,7 +377,7 @@ glusterd_snapdsvc_restart() + conf = this->private; + GF_ASSERT(conf); + +- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume snapd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c +index f5ecde7..69d4cf4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-statedump.c ++++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c +@@ -202,9 +202,6 @@ glusterd_dump_priv(xlator_t *this) + gf_proc_dump_build_key(key, "glusterd", "ping-timeout"); + gf_proc_dump_write(key, "%d", priv->ping_timeout); + +- gf_proc_dump_build_key(key, "glusterd", "shd.online"); +- gf_proc_dump_write(key, "%d", priv->shd_svc.online); +- + gf_proc_dump_build_key(key, "glusterd", "nfs.online"); + gf_proc_dump_write(key, "%d", priv->nfs_svc.online); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index ca19a75..e42703c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -7,6 +7,7 @@ + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ ++#include <signal.h> + + #include <glusterfs/globals.h> + #include <glusterfs/run.h> +@@ -20,12 +21,14 @@ + #include "glusterd-bitd-svc.h" + #include "glusterd-tierd-svc.h" + #include "glusterd-tierd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-scrub-svc.h" + #include "glusterd-svc-helper.h" + #include <glusterfs/syscall.h> ++#include "glusterd-snapshot-utils.h" + + int +-glusterd_svcs_reconfigure() ++glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo) + { + int ret = 0; + xlator_t *this = THIS; +@@ -43,9 +46,11 @@ glusterd_svcs_reconfigure() + goto out; + + svc_name = "self-heald"; +- ret = glusterd_shdsvc_reconfigure(); +- if (ret) +- goto out; ++ if (volinfo) { ++ ret = glusterd_shdsvc_reconfigure(volinfo); ++ if (ret) ++ goto out; ++ } + + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; +@@ -69,7 +74,7 @@ out: + } + + int +-glusterd_svcs_stop() ++glusterd_svcs_stop(glusterd_volinfo_t *volinfo) + { + int ret = 0; + xlator_t *this = NULL; +@@ -85,14 +90,16 @@ glusterd_svcs_stop() + if (ret) + goto out; + +- ret = glusterd_svc_stop(&(priv->shd_svc), SIGTERM); +- if (ret) +- goto out; +- + ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM); + if (ret) + goto out; + ++ if (volinfo) { ++ ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT); ++ if (ret) ++ goto out; ++ } ++ + ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM); + if (ret) + goto out; +@@ -121,12 +128,6 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + +- ret = conf->shd_svc.manager(&(conf->shd_svc), volinfo, PROC_START_NO_WAIT); +- if (ret == -EINVAL) +- ret = 0; +- if (ret) +- goto out; +- + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; + +@@ -143,6 +144,15 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + ++ if (volinfo) { ++ ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo, ++ PROC_START_NO_WAIT); ++ if (ret == -EINVAL) ++ ret = 0; ++ if (ret) ++ goto out; ++ } ++ + ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT); + if (ret == -EINVAL) + ret = 0; +@@ -269,3 +279,678 @@ out: + GF_FREE(tmpvol); + return ret; + } ++ ++int ++glusterd_volume_svc_check_volfile_identical( ++ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) ++{ ++ char orgvol[PATH_MAX] = { ++ 0, ++ }; ++ char *tmpvol = NULL; ++ xlator_t *this = NULL; ++ int ret = -1; ++ int need_unlink = 0; ++ int tmp_fd = -1; ++ ++ this = THIS; ++ ++ GF_VALIDATE_OR_GOTO(this->name, this, out); ++ GF_VALIDATE_OR_GOTO(this->name, identical, out); ++ ++ /* This builds volfile for volume level dameons */ ++ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, ++ sizeof(orgvol)); ++ ++ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); ++ if (ret < 0) { ++ goto out; ++ } ++ ++ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ tmp_fd = mkstemp(tmpvol); ++ if (tmp_fd < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, ++ "Unable to create temp file" ++ " %s:(%s)", ++ tmpvol, strerror(errno)); ++ ret = -1; ++ goto out; ++ } ++ ++ need_unlink = 1; ++ ++ ret = builder(volinfo, tmpvol, mode_dict); ++ if (ret) ++ goto out; ++ ++ ret = glusterd_check_files_identical(orgvol, tmpvol, identical); ++out: ++ if (need_unlink) ++ sys_unlink(tmpvol); ++ ++ if (tmpvol != NULL) ++ GF_FREE(tmpvol); ++ ++ if (tmp_fd >= 0) ++ sys_close(tmp_fd); ++ ++ return ret; ++} ++ ++int ++glusterd_volume_svc_check_topology_identical( ++ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) ++{ ++ char orgvol[PATH_MAX] = { ++ 0, ++ }; ++ char *tmpvol = NULL; ++ glusterd_conf_t *conf = NULL; ++ xlator_t *this = THIS; ++ int ret = -1; ++ int tmpclean = 0; ++ int tmpfd = -1; ++ ++ if ((!identical) || (!this) || (!this->private)) ++ goto out; ++ ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ /* This builds volfile for volume level dameons */ ++ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, ++ sizeof(orgvol)); ++ /* Create the temporary volfile */ ++ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); ++ if (ret < 0) { ++ goto out; ++ } ++ ++ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ tmpfd = mkstemp(tmpvol); ++ if (tmpfd < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, ++ "Unable to create temp file" ++ " %s:(%s)", ++ tmpvol, strerror(errno)); ++ ret = -1; ++ goto out; ++ } ++ ++ tmpclean = 1; /* SET the flag to unlink() tmpfile */ ++ ++ ret = builder(volinfo, tmpvol, mode_dict); ++ if (ret) ++ goto out; ++ ++ /* Compare the topology of volfiles */ ++ ret = glusterd_check_topology_identical(orgvol, tmpvol, identical); ++out: ++ if (tmpfd >= 0) ++ sys_close(tmpfd); ++ if (tmpclean) ++ sys_unlink(tmpvol); ++ if (tmpvol != NULL) ++ GF_FREE(tmpvol); ++ return ret; ++} ++ ++void * ++__gf_find_compatible_svc(gd_node_type daemon) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ glusterd_svc_proc_t *return_proc = NULL; ++ glusterd_svc_t *parent_svc = NULL; ++ struct cds_list_head *svc_procs = NULL; ++ glusterd_conf_t *conf = NULL; ++ int pid = -1; ++ ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ ++ if (daemon == GD_NODE_SHD) { ++ svc_procs = &conf->shd_procs; ++ if (!svc_procs) ++ goto out; ++ } ++ ++ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) ++ { ++ parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t, ++ mux_svc); ++ if (!return_proc) ++ return_proc = svc_proc; ++ ++ /* If there is an already running shd daemons, select it. Otehrwise ++ * select the first one. ++ */ ++ if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid)) ++ return (void *)svc_proc; ++ /* ++ * Logic to select one process goes here. Currently there is only one ++ * shd_proc. So selecting the first one; ++ */ ++ } ++out: ++ return return_proc; ++} ++ ++glusterd_svc_proc_t * ++glusterd_svcprocess_new() ++{ ++ glusterd_svc_proc_t *new_svcprocess = NULL; ++ ++ new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess), ++ gf_gld_mt_glusterd_svc_proc_t); ++ ++ if (!new_svcprocess) ++ return NULL; ++ ++ CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list); ++ CDS_INIT_LIST_HEAD(&new_svcprocess->svcs); ++ new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify; ++ return new_svcprocess; ++} ++ ++int ++glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) ++{ ++ int ret = -1; ++ glusterd_svc_proc_t *mux_proc = NULL; ++ glusterd_conn_t *mux_conn = NULL; ++ glusterd_conf_t *conf = NULL; ++ glusterd_svc_t *parent_svc = NULL; ++ int pid = -1; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); ++ conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ if (!svc->inited) { ++ if (gf_is_service_running(svc->proc.pidfile, &pid)) { ++ /* Just connect is required, but we don't know what happens ++ * during the disconnect. So better to reattach. ++ */ ++ mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid); ++ } ++ ++ if (!mux_proc) { ++ if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) { ++ /* stale pid file, unlink it. */ ++ kill(pid, SIGTERM); ++ sys_unlink(svc->proc.pidfile); ++ } ++ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); ++ } ++ if (mux_proc) { ++ /* Take first entry from the process */ ++ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, ++ mux_svc); ++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); ++ mux_conn = &parent_svc->conn; ++ if (volinfo) ++ volinfo->shd.attached = _gf_true; ++ } else { ++ mux_proc = glusterd_svcprocess_new(); ++ if (!mux_proc) { ++ ret = -1; ++ goto unlock; ++ } ++ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); ++ } ++ svc->svc_proc = mux_proc; ++ cds_list_del_init(&svc->mux_svc); ++ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); ++ ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc); ++ if (ret) { ++ pthread_mutex_unlock(&conf->attach_lock); ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, ++ "Failed to init shd " ++ "service"); ++ goto out; ++ } ++ gf_msg_debug(THIS->name, 0, "shd service initialized"); ++ svc->inited = _gf_true; ++ } ++ ret = 0; ++ } ++unlock: ++ pthread_mutex_unlock(&conf->attach_lock); ++out: ++ return ret; ++} ++ ++void * ++__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid) ++{ ++ glusterd_svc_proc_t *svc_proc = NULL; ++ struct cds_list_head *svc_procs = NULL; ++ glusterd_svc_t *svc = NULL; ++ pid_t mux_pid = -1; ++ glusterd_conf_t *conf = NULL; ++ ++ conf = THIS->private; ++ if (!conf) ++ return NULL; ++ ++ if (daemon == GD_NODE_SHD) { ++ svc_procs = &conf->shd_procs; ++ if (!svc_proc) ++ return NULL; ++ } /* Can be moved to switch when mux is implemented for other daemon; */ ++ ++ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) ++ { ++ cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc) ++ { ++ if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) { ++ if (mux_pid == pid) { ++ /*TODO ++ * inefficient loop, but at the moment, there is only ++ * one shd. ++ */ ++ return svc_proc; ++ } ++ } ++ } ++ } ++ return NULL; ++} ++ ++static int32_t ++my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) ++{ ++ call_frame_t *frame = v_frame; ++ xlator_t *this = NULL; ++ glusterd_conf_t *conf = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", frame, out); ++ this = frame->this; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ GF_ATOMIC_DEC(conf->blockers); ++ ++ STACK_DESTROY(frame->root); ++out: ++ return 0; ++} ++ ++static int32_t ++glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, ++ void *v_frame) ++{ ++ call_frame_t *frame = v_frame; ++ glusterd_volinfo_t *volinfo = NULL; ++ glusterd_shdsvc_t *shd = NULL; ++ glusterd_svc_t *svc = frame->cookie; ++ glusterd_svc_t *parent_svc = NULL; ++ glusterd_svc_proc_t *mux_proc = NULL; ++ glusterd_conf_t *conf = NULL; ++ int *flag = (int *)frame->local; ++ xlator_t *this = THIS; ++ int pid = -1; ++ int ret = -1; ++ gf_getspec_rsp rsp = { ++ 0, ++ }; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", frame, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ ++ frame->local = NULL; ++ frame->cookie = NULL; ++ ++ if (!strcmp(svc->name, "glustershd")) { ++ /* Get volinfo->shd from svc object */ ++ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); ++ if (!shd) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, ++ "Failed to get shd object " ++ "from shd service"); ++ goto out; ++ } ++ ++ /* Get volinfo from shd */ ++ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); ++ if (!volinfo) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Failed to get volinfo from " ++ "from shd"); ++ goto out; ++ } ++ } ++ ++ if (!iov) { ++ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, ++ "iov is NULL"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); ++ if (ret < 0) { ++ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, ++ "XDR decoding error"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (rsp.op_ret == 0) { ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ if (!strcmp(svc->name, "glustershd")) { ++ mux_proc = svc->svc_proc; ++ if (mux_proc && ++ !gf_is_service_running(svc->proc.pidfile, &pid)) { ++ /* ++ * When svc's are restarting, there is a chance that the ++ * attached svc might not have updated it's pid. Because ++ * it was at connection stage. So in that case, we need ++ * to retry the pid file copy. ++ */ ++ parent_svc = cds_list_entry(mux_proc->svcs.next, ++ glusterd_svc_t, mux_svc); ++ if (parent_svc) ++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); ++ } ++ } ++ svc->online = _gf_true; ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "svc %s of volume %s attached successfully to pid %d", svc->name, ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "svc %s of volume %s failed to " ++ "attach to pid %d. Starting a new process", ++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ if (!strcmp(svc->name, "glustershd")) { ++ glusterd_recover_shd_attach_failure(volinfo, svc, *flag); ++ } ++ } ++out: ++ if (flag) { ++ GF_FREE(flag); ++ } ++ GF_ATOMIC_DEC(conf->blockers); ++ STACK_DESTROY(frame->root); ++ return 0; ++} ++ ++extern size_t ++build_volfile_path(char *volume_id, char *path, size_t path_len, ++ char *trusted_str); ++ ++int ++__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, ++ struct rpc_clnt *rpc, char *volfile_id, ++ int op) ++{ ++ int ret = -1; ++ struct iobuf *iobuf = NULL; ++ struct iobref *iobref = NULL; ++ struct iovec iov = { ++ 0, ++ }; ++ char path[PATH_MAX] = { ++ '\0', ++ }; ++ struct stat stbuf = { ++ 0, ++ }; ++ int32_t spec_fd = -1; ++ size_t file_len = -1; ++ char *volfile_content = NULL; ++ ssize_t req_size = 0; ++ call_frame_t *frame = NULL; ++ gd1_mgmt_brick_op_req brick_req; ++ void *req = &brick_req; ++ void *errlbl = &&err; ++ struct rpc_clnt_connection *conn; ++ xlator_t *this = THIS; ++ glusterd_conf_t *conf = THIS->private; ++ extern struct rpc_clnt_program gd_brick_prog; ++ fop_cbk_fn_t cbkfn = my_callback; ++ ++ if (!rpc) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL, ++ "called with null rpc"); ++ return -1; ++ } ++ ++ conn = &rpc->conn; ++ if (!conn->connected || conn->disconnected) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED, ++ "not connected yet"); ++ return -1; ++ } ++ ++ brick_req.op = op; ++ brick_req.name = volfile_id; ++ brick_req.input.input_val = NULL; ++ brick_req.input.input_len = 0; ++ ++ frame = create_frame(this, this->ctx->pool); ++ if (!frame) { ++ goto *errlbl; ++ } ++ ++ if (op == GLUSTERD_SVC_ATTACH) { ++ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL); ++ ++ ret = sys_stat(path, &stbuf); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "Unable to stat %s (%s)", path, strerror(errno)); ++ ret = -EINVAL; ++ goto *errlbl; ++ } ++ ++ file_len = stbuf.st_size; ++ volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char); ++ if (!volfile_content) { ++ ret = -ENOMEM; ++ goto *errlbl; ++ } ++ spec_fd = open(path, O_RDONLY); ++ if (spec_fd < 0) { ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "failed to read volfile %s", path); ++ ret = -EIO; ++ goto *errlbl; ++ } ++ ret = sys_read(spec_fd, volfile_content, file_len); ++ if (ret == file_len) { ++ brick_req.input.input_val = volfile_content; ++ brick_req.input.input_len = file_len; ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "read failed on path %s. File size=%" GF_PRI_SIZET ++ "read size=%d", ++ path, file_len, ret); ++ ret = -EIO; ++ goto *errlbl; ++ } ++ ++ frame->cookie = svc; ++ frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int); ++ *((int *)frame->local) = flags; ++ cbkfn = glusterd_svc_attach_cbk; ++ } ++ ++ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); ++ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); ++ if (!iobuf) { ++ goto *errlbl; ++ } ++ errlbl = &&maybe_free_iobuf; ++ ++ iov.iov_base = iobuf->ptr; ++ iov.iov_len = iobuf_pagesize(iobuf); ++ ++ iobref = iobref_new(); ++ if (!iobref) { ++ goto *errlbl; ++ } ++ errlbl = &&free_iobref; ++ ++ iobref_add(iobref, iobuf); ++ /* ++ * Drop our reference to the iobuf. The iobref should already have ++ * one after iobref_add, so when we unref that we'll free the iobuf as ++ * well. This allows us to pass just the iobref as frame->local. ++ */ ++ iobuf_unref(iobuf); ++ /* Set the pointer to null so we don't free it on a later error. */ ++ iobuf = NULL; ++ ++ /* Create the xdr payload */ ++ ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); ++ if (ret == -1) { ++ goto *errlbl; ++ } ++ iov.iov_len = ret; ++ ++ /* Send the msg */ ++ GF_ATOMIC_INC(conf->blockers); ++ ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, ++ iobref, frame, NULL, 0, NULL, 0, NULL); ++ GF_FREE(volfile_content); ++ if (spec_fd >= 0) ++ sys_close(spec_fd); ++ return ret; ++ ++free_iobref: ++ iobref_unref(iobref); ++maybe_free_iobuf: ++ if (iobuf) { ++ iobuf_unref(iobuf); ++ } ++err: ++ GF_FREE(volfile_content); ++ if (spec_fd >= 0) ++ sys_close(spec_fd); ++ if (frame) ++ STACK_DESTROY(frame->root); ++ return -1; ++} ++ ++int ++glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags) ++{ ++ glusterd_conf_t *conf = THIS->private; ++ int ret = -1; ++ int tries; ++ rpc_clnt_t *rpc = NULL; ++ ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); ++ ++ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO, ++ "adding svc %s (volume=%s) to existing " ++ "process with pid %d", ++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ ++ rpc = rpc_clnt_ref(svc->conn.rpc); ++ for (tries = 15; tries > 0; --tries) { ++ if (rpc) { ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ ret = __glusterd_send_svc_configure_req( ++ svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ if (!ret) { ++ volinfo->shd.attached = _gf_true; ++ goto out; ++ } ++ } ++ /* ++ * It might not actually be safe to manipulate the lock ++ * like this, but if we don't then the connection can ++ * never actually complete and retries are useless. ++ * Unfortunately, all of the alternatives (e.g. doing ++ * all of this in a separate thread) are much more ++ * complicated and risky. ++ * TBD: see if there's a better way ++ */ ++ synclock_unlock(&conf->big_lock); ++ sleep(1); ++ synclock_lock(&conf->big_lock); ++ } ++ ret = -1; ++ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, ++ "attach failed for %s(volume=%s)", svc->name, volinfo->volname); ++out: ++ if (rpc) ++ rpc_clnt_unref(rpc); ++ return ret; ++} ++ ++int ++glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig) ++{ ++ glusterd_conf_t *conf = THIS->private; ++ int ret = -1; ++ int tries; ++ rpc_clnt_t *rpc = NULL; ++ ++ GF_VALIDATE_OR_GOTO(THIS->name, conf, out); ++ GF_VALIDATE_OR_GOTO(THIS->name, svc, out); ++ GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); ++ ++ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO, ++ "removing svc %s (volume=%s) from existing " ++ "process with pid %d", ++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ ++ rpc = rpc_clnt_ref(svc->conn.rpc); ++ for (tries = 15; tries > 0; --tries) { ++ if (rpc) { ++ /*For detach there is no flags, and we are not using sig.*/ ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc, ++ svc->proc.volfileid, ++ GLUSTERD_SVC_DETACH); ++ } ++ pthread_mutex_unlock(&conf->attach_lock); ++ if (!ret) { ++ goto out; ++ } ++ } ++ /* ++ * It might not actually be safe to manipulate the lock ++ * like this, but if we don't then the connection can ++ * never actually complete and retries are useless. ++ * Unfortunately, all of the alternatives (e.g. doing ++ * all of this in a separate thread) are much more ++ * complicated and risky. ++ * TBD: see if there's a better way ++ */ ++ synclock_unlock(&conf->big_lock); ++ sleep(1); ++ synclock_lock(&conf->big_lock); ++ } ++ ret = -1; ++ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL, ++ "detach failed for %s(volume=%s)", svc->name, volinfo->volname); ++out: ++ if (rpc) ++ rpc_clnt_unref(rpc); ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h +index cc98e78..5def246 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h +@@ -16,10 +16,10 @@ + #include "glusterd-volgen.h" + + int +-glusterd_svcs_reconfigure(); ++glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo); + + int +-glusterd_svcs_stop(); ++glusterd_svcs_stop(glusterd_volinfo_t *vol); + + int + glusterd_svcs_manager(glusterd_volinfo_t *volinfo); +@@ -41,5 +41,41 @@ int + glusterd_svc_check_tier_topology_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical); ++int ++glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict, ++ glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t, ++ gf_boolean_t *identical); ++int ++glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict, ++ glusterd_volinfo_t *volinfo, ++ glusterd_vol_graph_builder_t, ++ gf_boolean_t *identical); ++void ++glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, ++ char *volfile, size_t len); ++void * ++__gf_find_compatible_svc(gd_node_type daemon); ++ ++glusterd_svc_proc_t * ++glusterd_svcprocess_new(); ++ ++int ++glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc); ++ ++void * ++__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid); ++ ++int ++glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, ++ int flags); ++ ++int ++glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig); ++ ++int ++__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag, ++ struct rpc_clnt *rpc, char *volfile_id, ++ int op); + + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +index 4cd4cea..f32dafc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +@@ -18,6 +18,7 @@ + #include "glusterd-conn-mgmt.h" + #include "glusterd-messages.h" + #include <glusterfs/syscall.h> ++#include "glusterd-shd-svc-helper.h" + + int + glusterd_svc_create_rundir(char *rundir) +@@ -167,68 +168,75 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) + GF_ASSERT(this); + + priv = this->private; +- GF_ASSERT(priv); ++ GF_VALIDATE_OR_GOTO("glusterd", priv, out); ++ GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ ++ pthread_mutex_lock(&priv->attach_lock); ++ { ++ if (glusterd_proc_is_running(&(svc->proc))) { ++ ret = 0; ++ goto unlock; ++ } + +- if (glusterd_proc_is_running(&(svc->proc))) { +- ret = 0; +- goto out; +- } ++ ret = sys_access(svc->proc.volfile, F_OK); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND, ++ "Volfile %s is not present", svc->proc.volfile); ++ goto unlock; ++ } + +- ret = sys_access(svc->proc.volfile, F_OK); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND, +- "Volfile %s is not present", svc->proc.volfile); +- goto out; +- } ++ runinit(&runner); + +- runinit(&runner); ++ if (this->ctx->cmd_args.valgrind) { ++ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", ++ svc->proc.logfile, svc->name); ++ if ((len < 0) || (len >= PATH_MAX)) { ++ ret = -1; ++ goto unlock; ++ } + +- if (this->ctx->cmd_args.valgrind) { +- len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", +- svc->proc.logfile, svc->name); +- if ((len < 0) || (len >= PATH_MAX)) { +- ret = -1; +- goto out; ++ runner_add_args(&runner, "valgrind", "--leak-check=full", ++ "--trace-children=yes", "--track-origins=yes", ++ NULL); ++ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); + } + +- runner_add_args(&runner, "valgrind", "--leak-check=full", +- "--trace-children=yes", "--track-origins=yes", NULL); +- runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); +- } +- +- runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", +- svc->proc.volfileserver, "--volfile-id", +- svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", +- svc->proc.logfile, "-S", svc->conn.sockpath, NULL); ++ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", ++ svc->proc.volfileserver, "--volfile-id", ++ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", ++ svc->proc.logfile, "-S", svc->conn.sockpath, NULL); + +- if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, +- SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), +- &localtime_logging) == 0) { +- if (strcmp(localtime_logging, "enable") == 0) +- runner_add_arg(&runner, "--localtime-logging"); +- } +- if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY, +- SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), &log_level) == 0) { +- snprintf(daemon_log_level, 30, "--log-level=%s", log_level); +- runner_add_arg(&runner, daemon_log_level); +- } ++ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, ++ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), ++ &localtime_logging) == 0) { ++ if (strcmp(localtime_logging, "enable") == 0) ++ runner_add_arg(&runner, "--localtime-logging"); ++ } ++ if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY, ++ SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), ++ &log_level) == 0) { ++ snprintf(daemon_log_level, 30, "--log-level=%s", log_level); ++ runner_add_arg(&runner, daemon_log_level); ++ } + +- if (cmdline) +- dict_foreach(cmdline, svc_add_args, (void *)&runner); ++ if (cmdline) ++ dict_foreach(cmdline, svc_add_args, (void *)&runner); + +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, +- "Starting %s service", svc->name); ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, ++ "Starting %s service", svc->name); + +- if (flags == PROC_START_NO_WAIT) { +- ret = runner_run_nowait(&runner); +- } else { +- synclock_unlock(&priv->big_lock); +- { +- ret = runner_run(&runner); ++ if (flags == PROC_START_NO_WAIT) { ++ ret = runner_run_nowait(&runner); ++ } else { ++ synclock_unlock(&priv->big_lock); ++ { ++ ret = runner_run(&runner); ++ } ++ synclock_lock(&priv->big_lock); + } +- synclock_lock(&priv->big_lock); + } +- ++unlock: ++ pthread_mutex_unlock(&priv->attach_lock); + out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + +@@ -281,7 +289,8 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile, + + glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir)); + +- if (!strcmp(server, "quotad")) /*quotad has different volfile name*/ ++ if (!strcmp(server, "quotad")) ++ /*quotad has different volfile name*/ + snprintf(volfile, len, "%s/%s.vol", dir, server); + else + snprintf(volfile, len, "%s/%s-server.vol", dir, server); +@@ -366,3 +375,138 @@ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event) + + return ret; + } ++ ++void ++glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, ++ char *volfile, size_t len) ++{ ++ GF_ASSERT(len == PATH_MAX); ++ ++ if (!strcmp(server, "glustershd")) { ++ glusterd_svc_build_shd_volfile_path(vol, volfile, len); ++ } ++} ++ ++int ++glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc, ++ rpc_clnt_event_t event) ++{ ++ int ret = 0; ++ glusterd_svc_t *svc = NULL; ++ glusterd_svc_t *tmp = NULL; ++ xlator_t *this = NULL; ++ gf_boolean_t need_logging = _gf_false; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (!mux_proc) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, ++ "Failed to get the svc proc data"); ++ return -1; ++ } ++ ++ /* Currently this function was used for shd svc, if this function is ++ * using for another svc, change ths glustershd reference. We can get ++ * the svc name from any of the attached svc's ++ */ ++ switch (event) { ++ case RPC_CLNT_CONNECT: ++ gf_msg_debug(this->name, 0, ++ "glustershd has connected with glusterd."); ++ gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd"); ++ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) ++ { ++ if (svc->online) ++ continue; ++ svc->online = _gf_true; ++ } ++ break; ++ ++ case RPC_CLNT_DISCONNECT: ++ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) ++ { ++ if (svc->online) { ++ if (!need_logging) ++ need_logging = _gf_true; ++ svc->online = _gf_false; ++ } ++ } ++ if (need_logging) { ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED, ++ "glustershd has disconnected from glusterd."); ++ gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd"); ++ } ++ break; ++ ++ default: ++ gf_msg_trace(this->name, 0, "got some other RPC event %d", event); ++ break; ++ } ++ ++ return ret; ++} ++ ++int ++glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, ++ char *sockpath, int frame_timeout, ++ glusterd_muxsvc_conn_notify_t notify) ++{ ++ int ret = -1; ++ dict_t *options = NULL; ++ struct rpc_clnt *rpc = NULL; ++ xlator_t *this = THIS; ++ glusterd_svc_t *svc = NULL; ++ ++ options = dict_new(); ++ if (!this || !options) ++ goto out; ++ ++ svc = cds_list_entry(conn, glusterd_svc_t, conn); ++ if (!svc) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, ++ "Failed to get the service"); ++ goto out; ++ } ++ ++ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout); ++ if (ret) ++ goto out; ++ ++ ret = dict_set_int32n(options, "transport.socket.ignore-enoent", ++ SLEN("transport.socket.ignore-enoent"), 1); ++ if (ret) ++ goto out; ++ ++ /* @options is free'd by rpc_transport when destroyed */ ++ rpc = rpc_clnt_new(options, this, (char *)svc->name, 16); ++ if (!rpc) { ++ ret = -1; ++ goto out; ++ } ++ ++ ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify, ++ mux_proc); ++ if (ret) ++ goto out; ++ ++ ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath); ++ if (ret < 0) ++ goto out; ++ else ++ ret = 0; ++ ++ conn->frame_timeout = frame_timeout; ++ conn->rpc = rpc; ++ mux_proc->notify = notify; ++out: ++ if (options) ++ dict_unref(options); ++ if (ret) { ++ if (rpc) { ++ rpc_clnt_unref(rpc); ++ rpc = NULL; ++ } ++ } ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +index c850bfd..fbc5225 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +@@ -13,9 +13,12 @@ + + #include "glusterd-proc-mgmt.h" + #include "glusterd-conn-mgmt.h" ++#include "glusterd-rcu.h" + + struct glusterd_svc_; ++ + typedef struct glusterd_svc_ glusterd_svc_t; ++typedef struct glusterd_svc_proc_ glusterd_svc_proc_t; + + typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc); + +@@ -25,6 +28,17 @@ typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags); + typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig); + typedef int (*glusterd_svc_reconfigure_t)(void *data); + ++typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc, ++ rpc_clnt_event_t event); ++ ++struct glusterd_svc_proc_ { ++ struct cds_list_head svc_proc_list; ++ struct cds_list_head svcs; ++ glusterd_muxsvc_conn_notify_t notify; ++ rpc_clnt_t *rpc; ++ void *data; ++}; ++ + struct glusterd_svc_ { + char name[NAME_MAX]; + glusterd_conn_t conn; +@@ -35,6 +49,8 @@ struct glusterd_svc_ { + gf_boolean_t online; + gf_boolean_t inited; + glusterd_svc_reconfigure_t reconfigure; ++ glusterd_svc_proc_t *svc_proc; ++ struct cds_list_head mux_svc; + }; + + int +@@ -69,4 +85,15 @@ glusterd_svc_reconfigure(int (*create_volfile)()); + int + glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event); + ++int ++glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn, ++ rpc_clnt_event_t event); ++ ++int ++glusterd_proc_get_pid(glusterd_proc_t *proc); ++ ++int ++glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, ++ char *sockpath, int frame_timeout, ++ glusterd_muxsvc_conn_notify_t notify); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c +index 4dc0d44..23a9592 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tier.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c +@@ -27,6 +27,7 @@ + #include "glusterd-messages.h" + #include "glusterd-mgmt.h" + #include "glusterd-syncop.h" ++#include "glusterd-shd-svc-helper.h" + + #include <sys/wait.h> + #include <dlfcn.h> +@@ -615,7 +616,7 @@ glusterd_op_remove_tier_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + + if (cmd == GF_DEFRAG_CMD_DETACH_START && + volinfo->status == GLUSTERD_STATUS_STARTED) { +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +index 04ceec5..ab463f1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +@@ -83,7 +83,6 @@ glusterd_tierdsvc_init(void *data) + goto out; + + notify = glusterd_svc_common_rpc_notify; +- glusterd_store_perform_node_state_store(volinfo); + + volinfo->type = GF_CLUSTER_TYPE_TIER; + +@@ -395,6 +394,7 @@ int + glusterd_tierdsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_volinfo_t *tmp = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -405,7 +405,7 @@ glusterd_tierdsvc_restart() + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + +- cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) ++ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) + { + /* Start per volume tierd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED && +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 52b83ec..ef664c2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -61,6 +61,7 @@ + #include "glusterd-server-quorum.h" + #include <glusterfs/quota-common-utils.h> + #include <glusterfs/common-utils.h> ++#include "glusterd-shd-svc-helper.h" + + #include "xdr-generic.h" + #include <sys/resource.h> +@@ -583,13 +584,17 @@ glusterd_volinfo_t * + glusterd_volinfo_unref(glusterd_volinfo_t *volinfo) + { + int refcnt = -1; ++ glusterd_conf_t *conf = THIS->private; + +- pthread_mutex_lock(&volinfo->reflock); ++ pthread_mutex_lock(&conf->volume_lock); + { +- refcnt = --volinfo->refcnt; ++ pthread_mutex_lock(&volinfo->reflock); ++ { ++ refcnt = --volinfo->refcnt; ++ } ++ pthread_mutex_unlock(&volinfo->reflock); + } +- pthread_mutex_unlock(&volinfo->reflock); +- ++ pthread_mutex_unlock(&conf->volume_lock); + if (!refcnt) { + glusterd_volinfo_delete(volinfo); + return NULL; +@@ -661,6 +666,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo) + glusterd_snapdsvc_build(&new_volinfo->snapd.svc); + glusterd_tierdsvc_build(&new_volinfo->tierd.svc); + glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc); ++ glusterd_shdsvc_build(&new_volinfo->shd.svc); + + pthread_mutex_init(&new_volinfo->reflock, NULL); + *volinfo = glusterd_volinfo_ref(new_volinfo); +@@ -1026,11 +1032,11 @@ glusterd_volinfo_delete(glusterd_volinfo_t *volinfo) + gf_store_handle_destroy(volinfo->snapd.handle); + + glusterd_auth_cleanup(volinfo); ++ glusterd_shd_svcproc_cleanup(&volinfo->shd); + + pthread_mutex_destroy(&volinfo->reflock); + GF_FREE(volinfo); + ret = 0; +- + out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +@@ -3619,6 +3625,7 @@ glusterd_spawn_daemons(void *opaque) + ret = glusterd_snapdsvc_restart(); + ret = glusterd_tierdsvc_restart(); + ret = glusterd_gfproxydsvc_restart(); ++ ret = glusterd_shdsvc_restart(); + return ret; + } + +@@ -4569,6 +4576,9 @@ glusterd_delete_stale_volume(glusterd_volinfo_t *stale_volinfo, + svc = &(stale_volinfo->snapd.svc); + (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); + } ++ svc = &(stale_volinfo->shd.svc); ++ (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); ++ + (void)glusterd_volinfo_remove(stale_volinfo); + + return 0; +@@ -4683,6 +4693,15 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + glusterd_volinfo_unref(old_volinfo); + } + ++ ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, ++ "Failed to store " ++ "volinfo for volume %s", ++ new_volinfo->volname); ++ goto out; ++ } ++ + if (glusterd_is_volume_started(new_volinfo)) { + (void)glusterd_start_bricks(new_volinfo); + if (glusterd_is_snapd_enabled(new_volinfo)) { +@@ -4691,15 +4710,10 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + } + } +- } +- +- ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, +- "Failed to store " +- "volinfo for volume %s", +- new_volinfo->volname); +- goto out; ++ svc = &(new_volinfo->shd.svc); ++ if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { ++ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); ++ } + } + + ret = glusterd_create_volfiles_and_notify_services(new_volinfo); +@@ -5174,9 +5188,7 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + glusterd_svc_build_pidfile_path(server, priv->rundir, pidfile, + sizeof(pidfile)); + +- if (strcmp(server, priv->shd_svc.name) == 0) +- svc = &(priv->shd_svc); +- else if (strcmp(server, priv->nfs_svc.name) == 0) ++ if (strcmp(server, priv->nfs_svc.name) == 0) + svc = &(priv->nfs_svc); + else if (strcmp(server, priv->quotad_svc.name) == 0) + svc = &(priv->quotad_svc); +@@ -5207,9 +5219,6 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + if (!strcmp(server, priv->nfs_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "NFS Server", + SLEN("NFS Server")); +- else if (!strcmp(server, priv->shd_svc.name)) +- ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", +- SLEN("Self-heal Daemon")); + else if (!strcmp(server, priv->quotad_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "Quota Daemon", + SLEN("Quota Daemon")); +@@ -8773,6 +8782,21 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid) + "to stop snapd daemon service"); + } + } ++ ++ if (glusterd_is_shd_compatible_volume(volinfo)) { ++ /* ++ * Sending stop request for all volumes. So it is fine ++ * to send stop for mux shd ++ */ ++ svc = &(volinfo->shd.svc); ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, ++ "Failed " ++ "to stop shd daemon service"); ++ } ++ } ++ + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->stop(svc, SIGTERM); +@@ -8798,7 +8822,7 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid) + } + + /* Reconfigure all daemon services upon peer detach */ +- ret = glusterd_svcs_reconfigure(); ++ ret = glusterd_svcs_reconfigure(NULL); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); +@@ -14350,3 +14374,74 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo) + return _gf_true; + return _gf_false; + } ++ ++int32_t ++glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, ++ int32_t count) ++{ ++ int ret = -1; ++ int32_t pid = -1; ++ int32_t brick_online = -1; ++ char key[64] = {0}; ++ int keylen; ++ char *pidfile = NULL; ++ xlator_t *this = NULL; ++ char *uuid_str = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO(THIS->name, this, out); ++ ++ GF_VALIDATE_OR_GOTO(this->name, volinfo, out); ++ GF_VALIDATE_OR_GOTO(this->name, dict, out); ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.hostname", count); ++ ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", ++ SLEN("Self-heal Daemon")); ++ if (ret) ++ goto out; ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.path", count); ++ uuid_str = gf_strdup(uuid_utoa(MY_UUID)); ++ if (!uuid_str) { ++ ret = -1; ++ goto out; ++ } ++ ret = dict_set_dynstrn(dict, key, keylen, uuid_str); ++ if (ret) ++ goto out; ++ uuid_str = NULL; ++ ++ /* shd doesn't have a port. but the cli needs a port key with ++ * a zero value to parse. ++ * */ ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.port", count); ++ ret = dict_set_int32n(dict, key, keylen, 0); ++ if (ret) ++ goto out; ++ ++ pidfile = volinfo->shd.svc.proc.pidfile; ++ ++ brick_online = gf_is_service_running(pidfile, &pid); ++ ++ /* If shd is not running, then don't print the pid */ ++ if (!brick_online) ++ pid = -1; ++ keylen = snprintf(key, sizeof(key), "brick%d.pid", count); ++ ret = dict_set_int32n(dict, key, keylen, pid); ++ if (ret) ++ goto out; ++ ++ keylen = snprintf(key, sizeof(key), "brick%d.status", count); ++ ret = dict_set_int32n(dict, key, keylen, brick_online); ++ ++out: ++ if (uuid_str) ++ GF_FREE(uuid_str); ++ if (ret) ++ gf_msg(this ? this->name : "glusterd", GF_LOG_ERROR, 0, ++ GD_MSG_DICT_SET_FAILED, ++ "Returning %d. adding values to dict failed", ret); ++ ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 9bf19a6..3647c34 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -876,4 +876,8 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo); + + char * + search_brick_path_from_proc(pid_t brick_pid, char *brickpath); ++ ++int32_t ++glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, ++ int32_t count); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 1f53beb..324ec2f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -36,6 +36,7 @@ + #include "glusterd-svc-mgmt.h" + #include "glusterd-svc-helper.h" + #include "glusterd-snapd-svc-helper.h" ++#include "glusterd-shd-svc-helper.h" + #include "glusterd-gfproxyd-svc-helper.h" + + struct gd_validate_reconf_opts { +@@ -4845,7 +4846,7 @@ volgen_get_shd_key(int type) + static int + volgen_set_shd_key_enable(dict_t *set_dict, const int type) + { +- int ret = -1; ++ int ret = 0; + + switch (type) { + case GF_CLUSTER_TYPE_REPLICATE: +@@ -5136,24 +5137,15 @@ out: + static int + build_shd_volume_graph(xlator_t *this, volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, dict_t *mod_dict, +- dict_t *set_dict, gf_boolean_t graph_check, +- gf_boolean_t *valid_config) ++ dict_t *set_dict, gf_boolean_t graph_check) + { + volgen_graph_t cgraph = {0}; + int ret = 0; + int clusters = -1; + +- if (!graph_check && (volinfo->status != GLUSTERD_STATUS_STARTED)) +- goto out; +- + if (!glusterd_is_shd_compatible_volume(volinfo)) + goto out; + +- /* Shd graph is valid only when there is at least one +- * replica/disperse volume is present +- */ +- *valid_config = _gf_true; +- + ret = prepare_shd_volume_options(volinfo, mod_dict, set_dict); + if (ret) + goto out; +@@ -5183,19 +5175,16 @@ out: + } + + int +-build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict) ++build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, ++ dict_t *mod_dict) + { +- glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; +- glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; + int ret = 0; +- gf_boolean_t valid_config = _gf_false; + xlator_t *iostxl = NULL; + gf_boolean_t graph_check = _gf_false; + + this = THIS; +- priv = this->private; + + set_dict = dict_new(); + if (!set_dict) { +@@ -5205,26 +5194,18 @@ build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict) + + if (mod_dict) + graph_check = dict_get_str_boolean(mod_dict, "graph-check", 0); +- iostxl = volgen_graph_add_as(graph, "debug/io-stats", "glustershd"); ++ iostxl = volgen_graph_add_as(graph, "debug/io-stats", volinfo->volname); + if (!iostxl) { + ret = -1; + goto out; + } + +- cds_list_for_each_entry(voliter, &priv->volumes, vol_list) +- { +- ret = build_shd_volume_graph(this, graph, voliter, mod_dict, set_dict, +- graph_check, &valid_config); +- ret = dict_reset(set_dict); +- if (ret) +- goto out; +- } ++ ret = build_shd_volume_graph(this, graph, volinfo, mod_dict, set_dict, ++ graph_check); + + out: + if (set_dict) + dict_unref(set_dict); +- if (!valid_config) +- ret = -EINVAL; + return ret; + } + +@@ -6541,6 +6522,10 @@ glusterd_create_volfiles(glusterd_volinfo_t *volinfo) + if (ret) + gf_log(this->name, GF_LOG_ERROR, "Could not generate gfproxy volfiles"); + ++ ret = glusterd_shdsvc_create_volfile(volinfo); ++ if (ret) ++ gf_log(this->name, GF_LOG_ERROR, "Could not generate shd volfiles"); ++ + dict_del_sizen(volinfo->dict, "skip-CLIOT"); + + out: +@@ -6621,7 +6606,7 @@ validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + ret = dict_set_int32_sizen(val_dict, "graph-check", 1); + if (ret) + goto out; +- ret = build_shd_graph(&graph, val_dict); ++ ret = build_shd_graph(volinfo, &graph, val_dict); + if (!ret) + ret = graph_reconf_validateopt(&graph.graph, op_errstr); + +@@ -6998,3 +6983,22 @@ gd_is_boolean_option(char *key) + + return _gf_false; + } ++ ++int ++glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, ++ dict_t *mode_dict) ++{ ++ int ret = -1; ++ volgen_graph_t graph = { ++ 0, ++ }; ++ ++ graph.type = GF_SHD; ++ ret = build_shd_graph(volinfo, &graph, mode_dict); ++ if (!ret) ++ ret = volgen_write_volfile(&graph, filename); ++ ++ volgen_graph_free(&graph); ++ ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h +index f9fc068..897d8fa 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h +@@ -66,6 +66,7 @@ typedef enum { + GF_REBALANCED = 1, + GF_QUOTAD, + GF_SNAPD, ++ GF_SHD, + } glusterd_graph_type_t; + + struct volgen_graph { +@@ -77,6 +78,8 @@ typedef struct volgen_graph volgen_graph_t; + + typedef int (*glusterd_graph_builder_t)(volgen_graph_t *graph, + dict_t *mod_dict); ++typedef int (*glusterd_vol_graph_builder_t)(glusterd_volinfo_t *, ++ char *filename, dict_t *mod_dict); + + #define COMPLETE_OPTION(key, completion, ret) \ + do { \ +@@ -201,7 +204,8 @@ void + glusterd_get_shd_filepath(char *filename); + + int +-build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict); ++build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, ++ dict_t *mod_dict); + + int + build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict); +@@ -313,4 +317,9 @@ glusterd_generate_gfproxyd_volfile(glusterd_volinfo_t *volinfo); + + int + glusterd_build_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *filename); ++ ++int ++glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, ++ dict_t *mode_dict); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 1ea8ba6..4c3ad50 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1940,7 +1940,7 @@ static int + glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, char **op_errstr) + { +- glusterd_conf_t *priv = NULL; ++ glusterd_svc_t *svc = NULL; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; + int ret = 0; + char msg[2408] = { +@@ -1950,7 +1950,6 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + "Self-heal daemon is not running. " + "Check self-heal daemon log file."; + +- priv = this->private; + ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), + (int32_t *)&heal_op); + if (ret) { +@@ -1959,6 +1958,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + ++ svc = &(volinfo->shd.svc); + switch (heal_op) { + case GF_SHD_OP_INVALID: + case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/ +@@ -1988,7 +1988,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; +@@ -2009,7 +2009,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + +- if (!priv->shd_svc.online) { ++ if (!svc->online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index ff5af42..89afb9c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1533,14 +1533,6 @@ init(xlator_t *this) + exit(1); + } + +- ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_GLUSTERSHD_RUN_DIR); +- if (ret) { +- gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, +- "Unable to create " +- "glustershd running directory"); +- exit(1); +- } +- + ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_NFS_RUN_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, +@@ -1815,6 +1807,9 @@ init(xlator_t *this) + CDS_INIT_LIST_HEAD(&conf->snapshots); + CDS_INIT_LIST_HEAD(&conf->missed_snaps_list); + CDS_INIT_LIST_HEAD(&conf->brick_procs); ++ CDS_INIT_LIST_HEAD(&conf->shd_procs); ++ pthread_mutex_init(&conf->attach_lock, NULL); ++ pthread_mutex_init(&conf->volume_lock, NULL); + + pthread_mutex_init(&conf->mutex, NULL); + conf->rpc = rpc; +@@ -1895,7 +1890,6 @@ init(xlator_t *this) + glusterd_mgmt_v3_lock_timer_init(); + glusterd_txn_opinfo_dict_init(); + +- glusterd_shdsvc_build(&conf->shd_svc); + glusterd_nfssvc_build(&conf->nfs_svc); + glusterd_quotadsvc_build(&conf->quotad_svc); + glusterd_bitdsvc_build(&conf->bitd_svc); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index e858ce4..0ac6e63 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -28,6 +28,7 @@ + #include "glusterd-sm.h" + #include "glusterd-snapd-svc.h" + #include "glusterd-tierd-svc.h" ++#include "glusterd-shd-svc.h" + #include "glusterd-bitd-svc.h" + #include "glusterd1-xdr.h" + #include "protocol-common.h" +@@ -170,7 +171,6 @@ typedef struct { + char workdir[VALID_GLUSTERD_PATHMAX]; + char rundir[VALID_GLUSTERD_PATHMAX]; + rpcsvc_t *rpc; +- glusterd_svc_t shd_svc; + glusterd_svc_t nfs_svc; + glusterd_svc_t bitd_svc; + glusterd_svc_t scrub_svc; +@@ -179,6 +179,7 @@ typedef struct { + struct cds_list_head volumes; + struct cds_list_head snapshots; /*List of snap volumes */ + struct cds_list_head brick_procs; /* List of brick processes */ ++ struct cds_list_head shd_procs; /* List of shd processes */ + pthread_mutex_t xprt_lock; + struct list_head xprt_list; + pthread_mutex_t import_volumes; +@@ -219,6 +220,11 @@ typedef struct { + gf_atomic_t blockers; + uint32_t mgmt_v3_lock_timeout; + gf_boolean_t restart_bricks; ++ pthread_mutex_t attach_lock; /* Lock can be per process or a common one */ ++ pthread_mutex_t volume_lock; /* We release the big_lock from lot of places ++ which might lead the modification of volinfo ++ list. ++ */ + } glusterd_conf_t; + + typedef enum gf_brick_status { +@@ -498,6 +504,7 @@ struct glusterd_volinfo_ { + + glusterd_snapdsvc_t snapd; + glusterd_tierdsvc_t tierd; ++ glusterd_shdsvc_t shd; + glusterd_gfproxydsvc_t gfproxyd; + int32_t quota_xattr_version; + gf_boolean_t stage_deleted; /* volume has passed staging +@@ -624,7 +631,6 @@ typedef enum { + #define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" + #define GLUSTERD_BITD_RUN_DIR "/bitd" + #define GLUSTERD_SCRUB_RUN_DIR "/scrub" +-#define GLUSTERD_GLUSTERSHD_RUN_DIR "/glustershd" + #define GLUSTERD_NFS_RUN_DIR "/nfs" + #define GLUSTERD_QUOTAD_RUN_DIR "/quotad" + #define GLUSTER_SHARED_STORAGE_BRICK_DIR GLUSTERD_DEFAULT_WORKDIR "/ss_brick" +@@ -680,6 +686,26 @@ typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args); + } \ + } while (0) + ++#define GLUSTERD_GET_SHD_RUNDIR(path, volinfo, priv) \ ++ do { \ ++ int32_t _shd_dir_len; \ ++ _shd_dir_len = snprintf(path, PATH_MAX, "%s/shd/%s", priv->rundir, \ ++ volinfo->volname); \ ++ if ((_shd_dir_len < 0) || (_shd_dir_len >= PATH_MAX)) { \ ++ path[0] = 0; \ ++ } \ ++ } while (0) ++ ++#define GLUSTERD_GET_SHD_PID_FILE(path, volinfo, priv) \ ++ do { \ ++ int32_t _shd_pid_len; \ ++ _shd_pid_len = snprintf(path, PATH_MAX, "%s/shd/%s-shd.pid", \ ++ priv->rundir, volinfo->volname); \ ++ if ((_shd_pid_len < 0) || (_shd_pid_len >= PATH_MAX)) { \ ++ path[0] = 0; \ ++ } \ ++ } while (0) ++ + #define GLUSTERD_GET_VOLUME_PID_DIR(path, volinfo, priv) \ + do { \ + int32_t _vol_pid_len; \ +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 2d75714..19f5175 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -46,7 +46,6 @@ client_fini_complete(xlator_t *this) + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + clnt_conf_t *conf = this->private; +- + if (!conf->destroy) + return 0; + +@@ -69,6 +68,11 @@ client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...) + return 0; + + return client_notify_dispatch(this, event, data); ++ ++ /* Please avoid any code that access xlator object here ++ * Because for a child down event, once we do the signal ++ * we will start cleanup. ++ */ + } + + int +@@ -105,6 +109,11 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...) + } + pthread_mutex_unlock(&ctx->notify_lock); + ++ /* Please avoid any code that access xlator object here ++ * Because for a child down event, once we do the signal ++ * we will start cleanup. ++ */ ++ + return ret; + } + +@@ -2272,6 +2281,7 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + { + xlator_t *this = NULL; + clnt_conf_t *conf = NULL; ++ gf_boolean_t is_parent_down = _gf_false; + int ret = 0; + + this = mydata; +@@ -2333,6 +2343,19 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + if (conf->portmap_err_logged) + conf->disconnect_err_logged = 1; + } ++ /* ++ * Once we complete the child down notification, ++ * There is a chance that the graph might get freed, ++ * So it is not safe to access any xlator contens ++ * So here we are checking whether the parent is down ++ * or not. ++ */ ++ pthread_mutex_lock(&conf->lock); ++ { ++ is_parent_down = conf->parent_down; ++ } ++ pthread_mutex_unlock(&conf->lock); ++ + /* If the CHILD_DOWN event goes to parent xlator + multiple times, the logic of parent xlator notify + may get screwed up.. (eg. CHILD_MODIFIED event in +@@ -2340,6 +2363,12 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + to parent are genuine */ + ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN, + NULL); ++ if (is_parent_down) { ++ /* If parent is down, then there should not be any ++ * operation after a child down. ++ */ ++ goto out; ++ } + if (ret) + gf_msg(this->name, GF_LOG_INFO, 0, + PC_MSG_CHILD_DOWN_NOTIFY_FAILED, +-- +1.8.3.1 + diff --git a/SOURCES/0099-client-fini-return-fini-after-rpc-cleanup.patch b/SOURCES/0099-client-fini-return-fini-after-rpc-cleanup.patch new file mode 100644 index 0000000..5cff104 --- /dev/null +++ b/SOURCES/0099-client-fini-return-fini-after-rpc-cleanup.patch @@ -0,0 +1,119 @@ +From d79cb2cdff6fe8d962c9ac095a7541ddf500302b Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Mon, 1 Apr 2019 14:44:20 +0530 +Subject: [PATCH 099/124] client/fini: return fini after rpc cleanup + +There is a race condition in rpc_transport later +and client fini. + +Sequence of events to happen the race condition +1) When we want to destroy a graph, we send a parent down + event first +2) Once parent down received on a client xlator, we will + initiates a rpc disconnect +3) This will in turn generates a child down event. +4) When we process child down, we first do fini for + Every xlator +5) On successful return of fini, we delete the graph + +Here after the step 5, there is a chance that the fini +on client might not be finished. Because an rpc_tranpsort +ref can race with the above sequence. + +So we have to wait till all rpc's are successfully freed +before returning the fini from client + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22468/ + +>Change-Id: I20145662d71fb837e448a4d3210d1fcb2855f2d4 +>fixes: bz#1659708 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I848bcfb9443467caed32bae0717244ab01b407fc +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167831 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/protocol/client/src/client.c | 25 ++++++++++++++++++++----- + xlators/protocol/client/src/client.h | 6 ++++++ + 2 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 19f5175..a372807 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -49,11 +49,12 @@ client_fini_complete(xlator_t *this) + if (!conf->destroy) + return 0; + +- this->private = NULL; +- +- pthread_spin_destroy(&conf->fd_lock); +- pthread_mutex_destroy(&conf->lock); +- GF_FREE(conf); ++ pthread_mutex_lock(&conf->lock); ++ { ++ conf->fini_completed = _gf_true; ++ pthread_cond_broadcast(&conf->fini_complete_cond); ++ } ++ pthread_mutex_unlock(&conf->lock); + + out: + return 0; +@@ -2721,6 +2722,7 @@ init(xlator_t *this) + goto out; + + pthread_mutex_init(&conf->lock, NULL); ++ pthread_cond_init(&conf->fini_complete_cond, NULL); + pthread_spin_init(&conf->fd_lock, 0); + INIT_LIST_HEAD(&conf->saved_fds); + +@@ -2779,6 +2781,7 @@ fini(xlator_t *this) + if (!conf) + return; + ++ conf->fini_completed = _gf_false; + conf->destroy = 1; + if (conf->rpc) { + /* cleanup the saved-frames before last unref */ +@@ -2786,6 +2789,18 @@ fini(xlator_t *this) + rpc_clnt_unref(conf->rpc); + } + ++ pthread_mutex_lock(&conf->lock); ++ { ++ while (!conf->fini_completed) ++ pthread_cond_wait(&conf->fini_complete_cond, &conf->lock); ++ } ++ pthread_mutex_unlock(&conf->lock); ++ ++ pthread_spin_destroy(&conf->fd_lock); ++ pthread_mutex_destroy(&conf->lock); ++ pthread_cond_destroy(&conf->fini_complete_cond); ++ GF_FREE(conf); ++ + /* Saved Fds */ + /* TODO: */ + +diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h +index f12fa61..8dcd72f 100644 +--- a/xlators/protocol/client/src/client.h ++++ b/xlators/protocol/client/src/client.h +@@ -235,6 +235,12 @@ typedef struct clnt_conf { + * up, disconnects can be + * logged + */ ++ ++ gf_boolean_t old_protocol; /* used only for old-protocol testing */ ++ pthread_cond_t fini_complete_cond; /* Used to wait till we finsh the fini ++ compltely, ie client_fini_complete ++ to return*/ ++ gf_boolean_t fini_completed; + } clnt_conf_t; + + typedef struct _client_fd_ctx { +-- +1.8.3.1 + diff --git a/SOURCES/0100-clnt-rpc-ref-leak-during-disconnect.patch b/SOURCES/0100-clnt-rpc-ref-leak-during-disconnect.patch new file mode 100644 index 0000000..0eb4b02 --- /dev/null +++ b/SOURCES/0100-clnt-rpc-ref-leak-during-disconnect.patch @@ -0,0 +1,179 @@ +From 4d95e271a9042bf2d789a4d900ad263b6ea47681 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Wed, 23 Jan 2019 21:55:01 +0530 +Subject: [PATCH 100/124] clnt/rpc: ref leak during disconnect. + +During disconnect cleanup, we are not cancelling reconnect +timer, which causes a ref leak each time when a disconnect +happen. + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22087/ + +>Change-Id: I9d05d1f368d080e04836bf6a0bb018bf8f7b5b8a +>updates: bz#1659708 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I5a2dbb17e663a4809bb4c435cacadbf0ab694a76 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167844 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/timer.c | 16 +++++++---- + rpc/rpc-lib/src/rpc-clnt.c | 11 +++++++- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 32 ++++++++++++++++++---- + 3 files changed, 47 insertions(+), 12 deletions(-) + +diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c +index d882543..2643c07 100644 +--- a/libglusterfs/src/timer.c ++++ b/libglusterfs/src/timer.c +@@ -75,13 +75,13 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) + if (ctx == NULL || event == NULL) { + gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, + "invalid argument"); +- return 0; ++ return -1; + } + + if (ctx->cleanup_started) { + gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED, + "ctx cleanup started"); +- return 0; ++ return -1; + } + + LOCK(&ctx->lock); +@@ -93,10 +93,9 @@ gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event) + if (!reg) { + /* This can happen when cleanup may have just started and + * gf_timer_registry_destroy() sets ctx->timer to NULL. +- * Just bail out as success as gf_timer_proc() takes +- * care of cleaning up the events. ++ * gf_timer_proc() takes care of cleaning up the events. + */ +- return 0; ++ return -1; + } + + LOCK(®->lock); +@@ -203,6 +202,13 @@ gf_timer_proc(void *data) + list_for_each_entry_safe(event, tmp, ®->active, list) + { + list_del(&event->list); ++ /* TODO Possible resource leak ++ * Before freeing the event, we need to call the respective ++ * event functions and free any resources. ++ * For example, In case of rpc_clnt_reconnect, we need to ++ * unref rpc object which was taken when added to timer ++ * wheel. ++ */ + GF_FREE(event); + } + } +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index 3f7bb3c..6f47515 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -495,6 +495,7 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) + int unref = 0; + int ret = 0; + gf_boolean_t timer_unref = _gf_false; ++ gf_boolean_t reconnect_unref = _gf_false; + + if (!conn) { + goto out; +@@ -514,6 +515,12 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) + timer_unref = _gf_true; + conn->timer = NULL; + } ++ if (conn->reconnect) { ++ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect); ++ if (!ret) ++ reconnect_unref = _gf_true; ++ conn->reconnect = NULL; ++ } + + conn->connected = 0; + conn->disconnected = 1; +@@ -533,6 +540,8 @@ rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn) + if (timer_unref) + rpc_clnt_unref(clnt); + ++ if (reconnect_unref) ++ rpc_clnt_unref(clnt); + out: + return 0; + } +@@ -830,7 +839,7 @@ rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn) + pthread_mutex_lock(&conn->lock); + { + if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) { +- ts.tv_sec = 10; ++ ts.tv_sec = 3; + ts.tv_nsec = 0; + + rpc_clnt_ref(clnt); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index 041946d..b3c4158 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -3364,6 +3364,25 @@ out: + return ret; + } + ++int ++glusterd_is_path_mounted(const char *path) ++{ ++ FILE *mtab = NULL; ++ struct mntent *part = NULL; ++ int is_mounted = 0; ++ ++ if ((mtab = setmntent("/etc/mtab", "r")) != NULL) { ++ while ((part = getmntent(mtab)) != NULL) { ++ if ((part->mnt_fsname != NULL) && ++ (strcmp(part->mnt_dir, path)) == 0) { ++ is_mounted = 1; ++ break; ++ } ++ } ++ endmntent(mtab); ++ } ++ return is_mounted; ++} + /* This function will do unmount for snaps. + */ + int32_t +@@ -3388,14 +3407,11 @@ glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo) + continue; + } + +- /* Fetch the brick mount path from the brickinfo->path */ +- ret = glusterd_get_brick_root(brickinfo->path, &brick_mount_path); ++ ret = glusterd_find_brick_mount_path(brickinfo->path, ++ &brick_mount_path); + if (ret) { +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_PATH_UNMOUNTED, ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL, + "Failed to find brick_mount_path for %s", brickinfo->path); +- /* There is chance that brick path is already +- * unmounted. */ +- ret = 0; + goto out; + } + /* unmount cannot be done when the brick process is still in +@@ -3440,6 +3456,10 @@ glusterd_umount(const char *path) + GF_ASSERT(this); + GF_ASSERT(path); + ++ if (!glusterd_is_path_mounted(path)) { ++ return 0; ++ } ++ + runinit(&runner); + snprintf(msg, sizeof(msg), "umount path %s", path); + runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL); +-- +1.8.3.1 + diff --git a/SOURCES/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch b/SOURCES/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch new file mode 100644 index 0000000..f8d0763 --- /dev/null +++ b/SOURCES/0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch @@ -0,0 +1,162 @@ +From 0021a4bbc9af2bfe28d4a79f76c3cd33f23dd118 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Fri, 5 Apr 2019 12:33:55 +0530 +Subject: [PATCH 101/124] shd/mux: Fix coverity issues introduced by shd mux + patch + +CID 1400475: Null pointer dereferences (FORWARD_NULL) +CID 1400474: Null pointer dereferences (FORWARD_NULL) +CID 1400471: Code maintainability issues (UNUSED_VALUE) +CID 1400470: Null pointer dereferences (FORWARD_NULL) +CID 1400469: Memory - illegal accesses (USE_AFTER_FREE) +CID 1400467: Code maintainability issues (UNUSED_VALUE) + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22514/ + +>Change-Id: I0ca1c733be335c6e5844f44850f8066626ac40d4 +>updates: bz#789278 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I0425efca9ab5a95801eff9e99259219449a16380 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167832 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/graph.c | 21 +++++++++++++-------- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 6 ++++++ + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 24 +++++++++++++++++------- + 3 files changed, 36 insertions(+), 15 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index a492dd8..4c8b02d 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1470,7 +1470,9 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + goto out; + parent_graph = ctx->active; + graph = volfile_obj->graph; +- if (graph && graph->first) ++ if (!graph) ++ goto out; ++ if (graph->first) + xl = graph->first; + + last_xl = graph->last_xl; +@@ -1591,12 +1593,10 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, + parent_graph->leaf_count += graph->leaf_count; + parent_graph->id++; + ++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); + if (!volfile_obj) { +- volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); +- if (!volfile_obj) { +- ret = -1; +- goto out; +- } ++ ret = -1; ++ goto out; + } + + graph->used = 1; +@@ -1641,6 +1641,7 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + { + glusterfs_graph_t *oldvolfile_graph = NULL; + glusterfs_graph_t *newvolfile_graph = NULL; ++ char vol_id[NAME_MAX + 1]; + + int ret = -1; + +@@ -1672,6 +1673,9 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first); + + if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) { ++ ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id); ++ if (ret < 0) ++ goto out; + ret = glusterfs_process_svc_detach(ctx, volfile_obj); + if (ret) { + gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, +@@ -1680,8 +1684,9 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + "old graph. Aborting the reconfiguration operation"); + goto out; + } +- ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, +- volfile_obj->vol_id, checksum); ++ volfile_obj = NULL; ++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id, ++ checksum); + goto out; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 937ea30..04a4b2e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -101,6 +101,8 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc); + ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s", + mux_conn->sockpath); ++ if (ret < 0) ++ goto out; + } else { + ret = mkdir_p(logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { +@@ -663,6 +665,10 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) + glusterd_volinfo_ref(volinfo); + svc_proc->data = volinfo; + ret = glusterd_svc_stop(svc, sig); ++ if (ret) { ++ glusterd_volinfo_unref(volinfo); ++ goto out; ++ } + } + if (!empty && pid != -1) { + ret = glusterd_detach_svc(svc, volinfo, sig); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index e42703c..02945b1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -411,9 +411,14 @@ __gf_find_compatible_svc(gd_node_type daemon) + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + +- if (daemon == GD_NODE_SHD) { +- svc_procs = &conf->shd_procs; +- if (!svc_procs) ++ switch (daemon) { ++ case GD_NODE_SHD: { ++ svc_procs = &conf->shd_procs; ++ if (!svc_procs) ++ goto out; ++ } break; ++ default: ++ /* Add support for other client daemons here */ + goto out; + } + +@@ -540,11 +545,16 @@ __gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid) + if (!conf) + return NULL; + +- if (daemon == GD_NODE_SHD) { +- svc_procs = &conf->shd_procs; +- if (!svc_proc) ++ switch (daemon) { ++ case GD_NODE_SHD: { ++ svc_procs = &conf->shd_procs; ++ if (!svc_procs) ++ return NULL; ++ } break; ++ default: ++ /* Add support for other client daemons here */ + return NULL; +- } /* Can be moved to switch when mux is implemented for other daemon; */ ++ } + + cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) + { +-- +1.8.3.1 + diff --git a/SOURCES/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch b/SOURCES/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch new file mode 100644 index 0000000..39fe021 --- /dev/null +++ b/SOURCES/0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch @@ -0,0 +1,737 @@ +From df6523ed3c5267624197b52edcb553fc2d8a08f2 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Tue, 26 Feb 2019 18:04:18 +0530 +Subject: [PATCH 102/124] rpc/transport: Missing a ref on dict while creating + transport object + +while creating rpc_tranpsort object, we store a dictionary without +taking a ref on dict but it does an unref during the cleaning of the +transport object. + +So the rpc layer expect the caller to take a ref on the dictionary +before passing dict to rpc layer. This leads to a lot of confusion +across the code base and leads to ref leaks. + +Semantically, this is not correct. It is the rpc layer responsibility +to take a ref when storing it, and free during the cleanup. + +I'm listing down the total issues or leaks across the code base because +of this confusion. These issues are currently present in the upstream +master. + +1) changelog_rpc_client_init + +2) quota_enforcer_init + +3) rpcsvc_create_listeners : when there are two transport, like tcp,rdma. + +4) quotad_aggregator_init + +5) glusterd: init + +6) nfs3_init_state + +7) server: init + +8) client:init + +This patch does the cleanup according to the semantics. + +Backport of : https://review.gluster.org/#/c/glusterfs/+/22266/ + +>Change-Id: I46373af9630373eb375ee6de0e6f2bbe2a677425 +>updates: bz#1659708 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: Iff978497e11592fbebfa4b683fdc56698b782859 +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167847 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + api/src/glfs-mgmt.c | 10 ++++-- + cli/src/cli.c | 20 +++++++----- + glusterfsd/src/glusterfsd-mgmt.c | 18 ++++++++-- + rpc/rpc-lib/src/rpc-clnt.c | 2 -- + rpc/rpc-lib/src/rpc-transport.c | 38 +++++++--------------- + rpc/rpc-lib/src/rpc-transport.h | 4 +-- + rpc/rpc-lib/src/rpcsvc.c | 13 ++------ + rpc/rpc-lib/src/rpcsvc.h | 2 +- + .../features/changelog/src/changelog-rpc-common.c | 9 +++-- + .../snapview-server/src/snapview-server-mgmt.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-handler.c | 18 ++++++---- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-utils.c | 9 +++-- + xlators/mgmt/glusterd/src/glusterd.c | 6 +++- + xlators/nfs/server/src/acl3.c | 5 +++ + xlators/nfs/server/src/mount3.c | 5 +++ + xlators/nfs/server/src/nlm4.c | 7 ++++ + 18 files changed, 119 insertions(+), 71 deletions(-) + +diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c +index d502b4f..7476d5b 100644 +--- a/api/src/glfs-mgmt.c ++++ b/api/src/glfs-mgmt.c +@@ -1015,6 +1015,10 @@ glfs_mgmt_init(struct glfs *fs) + if (ctx->mgmt) + return 0; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + if (cmd_args->volfile_server_port) + port = cmd_args->volfile_server_port; + +@@ -1029,11 +1033,11 @@ glfs_mgmt_init(struct glfs *fs) + + if (cmd_args->volfile_server_transport && + !strcmp(cmd_args->volfile_server_transport, "unix")) { +- ret = rpc_transport_unix_options_build(&options, host, 0); ++ ret = rpc_transport_unix_options_build(options, host, 0); + } else { + xlator_cmdline_option_t *opt = find_xlator_option_in_cmd_args_t( + "address-family", cmd_args); +- ret = rpc_transport_inet_options_build(&options, host, port, ++ ret = rpc_transport_inet_options_build(options, host, port, + (opt ? opt->value : NULL)); + } + +@@ -1075,5 +1079,7 @@ glfs_mgmt_init(struct glfs *fs) + + ret = rpc_clnt_start(rpc); + out: ++ if (options) ++ dict_unref(options); + return ret; + } +diff --git a/cli/src/cli.c b/cli/src/cli.c +index c33d152..ff39a98 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -661,9 +661,8 @@ cli_quotad_clnt_rpc_init(void) + + global_quotad_rpc = rpc; + out: +- if (ret) { +- if (rpc_opts) +- dict_unref(rpc_opts); ++ if (rpc_opts) { ++ dict_unref(rpc_opts); + } + return rpc; + } +@@ -685,6 +684,10 @@ cli_rpc_init(struct cli_state *state) + this = THIS; + cli_rpc_prog = &cli_prog; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + /* If address family specified in CLI */ + if (state->address_family) { + addr_family = state->address_family; +@@ -699,7 +702,7 @@ cli_rpc_init(struct cli_state *state) + "Connecting to glusterd using " + "sockfile %s", + state->glusterd_sock); +- ret = rpc_transport_unix_options_build(&options, state->glusterd_sock, ++ ret = rpc_transport_unix_options_build(options, state->glusterd_sock, + 0); + if (ret) + goto out; +@@ -709,10 +712,6 @@ cli_rpc_init(struct cli_state *state) + "%s", + state->remote_host); + +- options = dict_new(); +- if (!options) +- goto out; +- + ret = dict_set_str(options, "remote-host", state->remote_host); + if (ret) + goto out; +@@ -731,7 +730,7 @@ cli_rpc_init(struct cli_state *state) + gf_log("cli", GF_LOG_DEBUG, + "Connecting to glusterd using " + "default socket"); +- ret = rpc_transport_unix_options_build(&options, ++ ret = rpc_transport_unix_options_build(options, + DEFAULT_GLUSTERD_SOCKFILE, 0); + if (ret) + goto out; +@@ -749,6 +748,9 @@ cli_rpc_init(struct cli_state *state) + + ret = rpc_clnt_start(rpc); + out: ++ if (options) ++ dict_unref(options); ++ + if (ret) { + if (rpc) + rpc_clnt_unref(rpc); +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index a89c980..1d2cd1a 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -2781,7 +2781,11 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx) + if (!cmd_args->sock_file) + return 0; + +- ret = rpcsvc_transport_unix_options_build(&options, cmd_args->sock_file); ++ options = dict_new(); ++ if (!options) ++ goto out; ++ ++ ret = rpcsvc_transport_unix_options_build(options, cmd_args->sock_file); + if (ret) + goto out; + +@@ -2808,6 +2812,8 @@ glusterfs_listener_init(glusterfs_ctx_t *ctx) + ctx->listener = rpc; + + out: ++ if (options) ++ dict_unref(options); + return ret; + } + +@@ -2889,6 +2895,10 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx) + if (ctx->mgmt) + return 0; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + LOCK_INIT(&ctx->volfile_lock); + + if (cmd_args->volfile_server_port) +@@ -2898,10 +2908,10 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx) + + if (cmd_args->volfile_server_transport && + !strcmp(cmd_args->volfile_server_transport, "unix")) { +- ret = rpc_transport_unix_options_build(&options, host, 0); ++ ret = rpc_transport_unix_options_build(options, host, 0); + } else { + opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); +- ret = rpc_transport_inet_options_build(&options, host, port, ++ ret = rpc_transport_inet_options_build(options, host, port, + (opt ? opt->value : NULL)); + } + if (ret) +@@ -2950,6 +2960,8 @@ glusterfs_mgmt_init(glusterfs_ctx_t *ctx) + + ret = rpc_clnt_start(rpc); + out: ++ if (options) ++ dict_unref(options); + return ret; + } + +diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c +index 6f47515..b04eaed 100644 +--- a/rpc/rpc-lib/src/rpc-clnt.c ++++ b/rpc/rpc-lib/src/rpc-clnt.c +@@ -1125,8 +1125,6 @@ rpc_clnt_new(dict_t *options, xlator_t *owner, char *name, + mem_pool_destroy(rpc->saved_frames_pool); + GF_FREE(rpc); + rpc = NULL; +- if (options) +- dict_unref(options); + goto out; + } + +diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c +index 4beaaf9..bed1f8c 100644 +--- a/rpc/rpc-lib/src/rpc-transport.c ++++ b/rpc/rpc-lib/src/rpc-transport.c +@@ -168,6 +168,11 @@ rpc_transport_cleanup(rpc_transport_t *trans) + if (trans->fini) + trans->fini(trans); + ++ if (trans->options) { ++ dict_unref(trans->options); ++ trans->options = NULL; ++ } ++ + GF_FREE(trans->name); + + if (trans->xl) +@@ -352,7 +357,7 @@ rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name) + } + } + +- trans->options = options; ++ trans->options = dict_ref(options); + + pthread_mutex_init(&trans->lock, NULL); + trans->xl = this; +@@ -591,19 +596,14 @@ out: + } + + int +-rpc_transport_unix_options_build(dict_t **options, char *filepath, ++rpc_transport_unix_options_build(dict_t *dict, char *filepath, + int frame_timeout) + { +- dict_t *dict = NULL; + char *fpath = NULL; + int ret = -1; + + GF_ASSERT(filepath); +- GF_ASSERT(options); +- +- dict = dict_new(); +- if (!dict) +- goto out; ++ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out); + + fpath = gf_strdup(filepath); + if (!fpath) { +@@ -638,20 +638,14 @@ rpc_transport_unix_options_build(dict_t **options, char *filepath, + if (ret) + goto out; + } +- +- *options = dict; + out: +- if (ret && dict) { +- dict_unref(dict); +- } + return ret; + } + + int +-rpc_transport_inet_options_build(dict_t **options, const char *hostname, +- int port, char *af) ++rpc_transport_inet_options_build(dict_t *dict, const char *hostname, int port, ++ char *af) + { +- dict_t *dict = NULL; + char *host = NULL; + int ret = -1; + #ifdef IPV6_DEFAULT +@@ -660,13 +654,9 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname, + char *addr_family = "inet"; + #endif + +- GF_ASSERT(options); + GF_ASSERT(hostname); + GF_ASSERT(port >= 1024); +- +- dict = dict_new(); +- if (!dict) +- goto out; ++ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out); + + host = gf_strdup((char *)hostname); + if (!host) { +@@ -702,12 +692,6 @@ rpc_transport_inet_options_build(dict_t **options, const char *hostname, + "failed to set trans-type with socket"); + goto out; + } +- +- *options = dict; + out: +- if (ret && dict) { +- dict_unref(dict); +- } +- + return ret; + } +diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h +index 9e75d1a..64b7e9b 100644 +--- a/rpc/rpc-lib/src/rpc-transport.h ++++ b/rpc/rpc-lib/src/rpc-transport.h +@@ -303,11 +303,11 @@ rpc_transport_keepalive_options_set(dict_t *options, int32_t interval, + int32_t time, int32_t timeout); + + int +-rpc_transport_unix_options_build(dict_t **options, char *filepath, ++rpc_transport_unix_options_build(dict_t *options, char *filepath, + int frame_timeout); + + int +-rpc_transport_inet_options_build(dict_t **options, const char *hostname, ++rpc_transport_inet_options_build(dict_t *options, const char *hostname, + int port, char *af); + + void +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 74373c4..5a35139 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -2615,18 +2615,13 @@ rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options) + } + + int +-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath) ++rpcsvc_transport_unix_options_build(dict_t *dict, char *filepath) + { +- dict_t *dict = NULL; + char *fpath = NULL; + int ret = -1; + + GF_ASSERT(filepath); +- GF_ASSERT(options); +- +- dict = dict_new(); +- if (!dict) +- goto out; ++ GF_VALIDATE_OR_GOTO("rpcsvc", dict, out); + + fpath = gf_strdup(filepath); + if (!fpath) { +@@ -2649,13 +2644,9 @@ rpcsvc_transport_unix_options_build(dict_t **options, char *filepath) + ret = dict_set_str(dict, "transport-type", "socket"); + if (ret) + goto out; +- +- *options = dict; + out: + if (ret) { + GF_FREE(fpath); +- if (dict) +- dict_unref(dict); + } + return ret; + } +diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h +index 34045ce..a51edc7 100644 +--- a/rpc/rpc-lib/src/rpcsvc.h ++++ b/rpc/rpc-lib/src/rpcsvc.h +@@ -665,7 +665,7 @@ rpcsvc_actor_t * + rpcsvc_program_actor(rpcsvc_request_t *req); + + int +-rpcsvc_transport_unix_options_build(dict_t **options, char *filepath); ++rpcsvc_transport_unix_options_build(dict_t *options, char *filepath); + int + rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options); + int +diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c +index cf35175..dcdcfb1 100644 +--- a/xlators/features/changelog/src/changelog-rpc-common.c ++++ b/xlators/features/changelog/src/changelog-rpc-common.c +@@ -47,7 +47,7 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, + if (!options) + goto error_return; + +- ret = rpc_transport_unix_options_build(&options, sockfile, 0); ++ ret = rpc_transport_unix_options_build(options, sockfile, 0); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR, + "failed to build rpc options"); +@@ -73,6 +73,7 @@ changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, + goto dealloc_rpc_clnt; + } + ++ dict_unref(options); + return rpc; + + dealloc_rpc_clnt: +@@ -303,7 +304,11 @@ changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata, + if (!cbkdata) + cbkdata = this; + +- ret = rpcsvc_transport_unix_options_build(&options, sockfile); ++ options = dict_new(); ++ if (!options) ++ return NULL; ++ ++ ret = rpcsvc_transport_unix_options_build(options, sockfile); + if (ret) + goto dealloc_dict; + +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index b608cdf..bc415ef 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -101,8 +101,12 @@ svs_mgmt_init(xlator_t *this) + if (cmd_args->volfile_server) + host = cmd_args->volfile_server; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args); +- ret = rpc_transport_inet_options_build(&options, host, port, ++ ret = rpc_transport_inet_options_build(options, host, port, + (opt != NULL ? opt->value : NULL)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED, +@@ -145,6 +149,8 @@ svs_mgmt_init(xlator_t *this) + gf_msg_debug(this->name, 0, "svs mgmt init successful"); + + out: ++ if (options) ++ dict_unref(options); + if (ret) + if (priv) { + rpc_clnt_connection_cleanup(&priv->rpc->conn); +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +index 052438c..16eefa1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +@@ -29,6 +29,10 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + if (!this) + goto out; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + svc = glusterd_conn_get_svc_object(conn); + if (!svc) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, +@@ -36,7 +40,7 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + goto out; + } + +- ret = rpc_transport_unix_options_build(&options, sockpath, frame_timeout); ++ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout); + if (ret) + goto out; + +@@ -66,6 +70,8 @@ glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout, + conn->rpc = rpc; + conn->notify = notify; + out: ++ if (options) ++ dict_unref(options); + if (ret) { + if (rpc) { + rpc_clnt_unref(rpc); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 1cb9013..6147995 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3493,11 +3493,10 @@ out: + } + + int +-glusterd_transport_inet_options_build(dict_t **options, const char *hostname, ++glusterd_transport_inet_options_build(dict_t *dict, const char *hostname, + int port, char *af) + { + xlator_t *this = NULL; +- dict_t *dict = NULL; + int32_t interval = -1; + int32_t time = -1; + int32_t timeout = -1; +@@ -3505,14 +3504,14 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname, + + this = THIS; + GF_ASSERT(this); +- GF_ASSERT(options); ++ GF_ASSERT(dict); + GF_ASSERT(hostname); + + if (!port) + port = GLUSTERD_DEFAULT_PORT; + + /* Build default transport options */ +- ret = rpc_transport_inet_options_build(&dict, hostname, port, af); ++ ret = rpc_transport_inet_options_build(dict, hostname, port, af); + if (ret) + goto out; + +@@ -3552,7 +3551,6 @@ glusterd_transport_inet_options_build(dict_t **options, const char *hostname, + if ((interval > 0) || (time > 0)) + ret = rpc_transport_keepalive_options_set(dict, interval, time, + timeout); +- *options = dict; + out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; +@@ -3572,6 +3570,10 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + if (!peerctx) + goto out; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + if (args) + peerctx->args = *args; + +@@ -3586,7 +3588,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + if (ret) + gf_log(this->name, GF_LOG_TRACE, + "option transport.address-family is not set in xlator options"); +- ret = glusterd_transport_inet_options_build(&options, peerinfo->hostname, ++ ret = glusterd_transport_inet_options_build(options, peerinfo->hostname, + peerinfo->port, af); + if (ret) + goto out; +@@ -3596,6 +3598,7 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + * create our RPC endpoint with the same address that the peer would + * use to reach us. + */ ++ + if (this->options) { + data = dict_getn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address")); +@@ -3637,6 +3640,9 @@ glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo, + peerctx = NULL; + ret = 0; + out: ++ if (options) ++ dict_unref(options); ++ + GF_FREE(peerctx); + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index ed5ded5..cbed9a9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -391,6 +391,10 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + if (!defrag) + goto out; + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); + /* Check if defrag sockfile exists in the new location + * in /var/run/ , if it does not try the old location +@@ -420,7 +424,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ +- ret = rpc_transport_unix_options_build(&options, sockfile, 600); ++ ret = rpc_transport_unix_options_build(options, sockfile, 600); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL, + "Unix options build failed"); +@@ -437,6 +441,8 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + } + ret = 0; + out: ++ if (options) ++ dict_unref(options); + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ef664c2..2dd5f91 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1980,7 +1980,11 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo, + * The default timeout of 30mins used for unreliable network + * connections is too long for unix domain socket connections. + */ +- ret = rpc_transport_unix_options_build(&options, socketpath, 600); ++ options = dict_new(); ++ if (!options) ++ goto out; ++ ++ ret = rpc_transport_unix_options_build(options, socketpath, 600); + if (ret) + goto out; + +@@ -1999,7 +2003,8 @@ glusterd_brick_connect(glusterd_volinfo_t *volinfo, + brickinfo->rpc = rpc; + } + out: +- ++ if (options) ++ dict_unref(options); + gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index 89afb9c..d4ab630 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1111,11 +1111,15 @@ glusterd_init_uds_listener(xlator_t *this) + + GF_ASSERT(this); + ++ options = dict_new(); ++ if (!options) ++ goto out; ++ + sock_data = dict_get(this->options, "glusterd-sockfile"); + (void)snprintf(sockfile, sizeof(sockfile), "%s", + sock_data ? sock_data->data : DEFAULT_GLUSTERD_SOCKFILE); + +- ret = rpcsvc_transport_unix_options_build(&options, sockfile); ++ ret = rpcsvc_transport_unix_options_build(options, sockfile); + if (ret) + goto out; + +diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c +index 0eca45d..2ede24b 100644 +--- a/xlators/nfs/server/src/acl3.c ++++ b/xlators/nfs/server/src/acl3.c +@@ -787,9 +787,14 @@ acl3svc_init(xlator_t *nfsx) + goto err; + } + ++ if (options) ++ dict_unref(options); ++ + acl3_inited = _gf_true; + return &acl3prog; + err: ++ if (options) ++ dict_unref(options); + return NULL; + } + +diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c +index 726dc29..396809c 100644 +--- a/xlators/nfs/server/src/mount3.c ++++ b/xlators/nfs/server/src/mount3.c +@@ -4102,8 +4102,13 @@ mnt3svc_init(xlator_t *nfsx) + gf_msg_debug(GF_MNT, GF_LOG_DEBUG, "Thread creation failed"); + } + } ++ if (options) ++ dict_unref(options); ++ + return &mnt3prog; + err: ++ if (options) ++ dict_unref(options); + return NULL; + } + +diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c +index a341ebd..c3c1453 100644 +--- a/xlators/nfs/server/src/nlm4.c ++++ b/xlators/nfs/server/src/nlm4.c +@@ -1121,6 +1121,8 @@ nlm4_establish_callback(nfs3_call_state_t *cs, call_frame_t *cbk_frame) + ret = 0; + + err: ++ if (options) ++ dict_unref(options); + if (ret == -1) { + if (rpc_clnt) + rpc_clnt_unref(rpc_clnt); +@@ -2708,8 +2710,13 @@ nlm4svc_init(xlator_t *nfsx) + + gf_timer_call_after(nfsx->ctx, timeout, nlm_grace_period_over, NULL); + nlm4_inited = _gf_true; ++ ++ if (options) ++ dict_unref(options); + return &nlm4prog; + err: ++ if (options) ++ dict_unref(options); + return NULL; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0103-dht-NULL-check-before-setting-error-flag.patch b/SOURCES/0103-dht-NULL-check-before-setting-error-flag.patch new file mode 100644 index 0000000..addd4f7 --- /dev/null +++ b/SOURCES/0103-dht-NULL-check-before-setting-error-flag.patch @@ -0,0 +1,43 @@ +From 45c9eeb5544738d4d1d0aefb8a7f61e5d8859ad8 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Tue, 12 Mar 2019 18:00:37 +0530 +Subject: [PATCH 103/124] dht: NULL check before setting error flag + +Function dht_common_mark_mdsxattr blindly setting value for +an integer pointer without validating it. In fact there are +two callers of this function that passes NULL value to the +same pointer which leads to a crash. + +Backport of : https://review.gluster.org/#/c/22345/ + +>Change-Id: Id94ffe216f6a21f007b3291bff0b1e1c1989075c +>fixes: bz#1687811 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: Id9785c16184fd80e8184e5ae135fb63bf44692cd +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167846 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 367548f..2a68193 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -852,7 +852,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, + "Failed to get hashed subvol for path %s" + "gfid is %s ", + local->loc.path, gfid_local); +- (*errst) = 1; ++ if (errst) ++ (*errst) = 1; + ret = -1; + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch b/SOURCES/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch new file mode 100644 index 0000000..214ccb4 --- /dev/null +++ b/SOURCES/0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch @@ -0,0 +1,151 @@ +From faaaa3452ceec6afcc18cffc9beca3fe19841cce Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 3 Jan 2019 17:44:18 +0530 +Subject: [PATCH 104/124] afr/shd: Cleanup self heal daemon resources during + afr fini + +We were not properly cleaning self-heal daemon resources +during afr fini. This patch will clean the same. + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22151/ + +>Change-Id: I597860be6f781b195449e695d871b8667a418d5a +>updates: bz#1659708 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I7be981b9c2476c8cacadea6b14d74234f67b714f +BUG: 1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167845 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/syncop-utils.c | 8 +++++ + xlators/cluster/afr/src/afr-self-heald.c | 2 ++ + xlators/cluster/afr/src/afr.c | 57 ++++++++++++++++++++++++++++++++ + 3 files changed, 67 insertions(+) + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index be03527..b842142 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -350,6 +350,11 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + gf_boolean_t cond_init = _gf_false; + gf_boolean_t mut_init = _gf_false; + gf_dirent_t entries; ++ xlator_t *this = NULL; ++ ++ if (frame) { ++ this = frame->this; ++ } + + /*For this functionality to be implemented in general, we need + * synccond_t infra which doesn't block the executing thread. Until then +@@ -397,6 +402,9 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { ++ if (this && this->cleanup_starting) ++ goto out; ++ + list_del_init(&entry->list); + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) { + gf_dirent_entry_free(entry); +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 7eb1207..8bc4720 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -373,6 +373,7 @@ afr_shd_sweep_prepare(struct subvol_healer *healer) + + time(&event->start_time); + event->end_time = 0; ++ _mask_cancellation(); + } + + void +@@ -394,6 +395,7 @@ afr_shd_sweep_done(struct subvol_healer *healer) + + if (eh_save_history(shd->statistics[healer->subvol], history) < 0) + GF_FREE(history); ++ _unmask_cancellation(); + } + + int +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 33258a0..a0a7551 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -611,13 +611,70 @@ init(xlator_t *this) + out: + return ret; + } ++void ++afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) ++{ ++ int ret = -1; ++ ++ if (!healer) ++ return; ++ ++ if (healer->running) { ++ /* ++ * If there are any resources to cleanup, We need ++ * to do that gracefully using pthread_cleanup_push ++ */ ++ ret = gf_thread_cleanup_xint(healer->thread); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED, ++ "Failed to clean up healer threads."); ++ healer->thread = 0; ++ } ++ pthread_cond_destroy(&healer->cond); ++ pthread_mutex_destroy(&healer->mutex); ++} ++ ++void ++afr_selfheal_daemon_fini(xlator_t *this) ++{ ++ struct subvol_healer *healer = NULL; ++ afr_self_heald_t *shd = NULL; ++ afr_private_t *priv = NULL; ++ int i = 0; ++ ++ priv = this->private; ++ if (!priv) ++ return; ++ ++ shd = &priv->shd; ++ if (!shd->iamshd) ++ return; ++ ++ for (i = 0; i < priv->child_count; i++) { ++ healer = &shd->index_healers[i]; ++ afr_destroy_healer_object(this, healer); + ++ healer = &shd->full_healers[i]; ++ afr_destroy_healer_object(this, healer); ++ ++ if (shd->statistics[i]) ++ eh_destroy(shd->statistics[i]); ++ } ++ GF_FREE(shd->index_healers); ++ GF_FREE(shd->full_healers); ++ GF_FREE(shd->statistics); ++ if (shd->split_brain) ++ eh_destroy(shd->split_brain); ++} + void + fini(xlator_t *this) + { + afr_private_t *priv = NULL; + + priv = this->private; ++ ++ afr_selfheal_daemon_fini(this); ++ + LOCK(&priv->lock); + if (priv->timer != NULL) { + gf_timer_call_cancel(this->ctx, priv->timer); +-- +1.8.3.1 + diff --git a/SOURCES/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch b/SOURCES/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch new file mode 100644 index 0000000..a735794 --- /dev/null +++ b/SOURCES/0105-core-Log-level-changes-do-not-effect-on-running-clie.patch @@ -0,0 +1,336 @@ +From 023854d5573211d4737eb0ebe7ec954a7b7bb4ee Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Mon, 15 Apr 2019 10:34:34 +0530 +Subject: [PATCH 105/124] core: Log level changes do not effect on running + client process + +Problem: commit c34e4161f3cb6539ec83a9020f3d27eb4759a975 set log-level + per xlator during reconfigure only for a brick process not for + the client process. + +Solution: 1) Change per xlator log-level only if brick_mux is enabled.To make sure + about brick multiplex introudce a flag brick_mux at ctx->cmd_args. + +Note: There are two other changes done with this patch + 1) Ignore client-log-level option to attach a brick with + already running brick if brick_mux is enabled + 2) Add a log to print pid of the running process to make easier + debugging + +> Change-Id: I39e85de778e150d0685cd9a79425ce8b4783f9c9 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> Fixes: bz#1696046 +> (Cherry picked from commit 798aadbe51a9a02dd98a0f861cc239ecf7c8ed57) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22495/) + +Change-Id: If82cc8e51cf00bd50d3321d31ec420f89786ea02 +Fixes: bz#1695081 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167828 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfsd/src/glusterfsd-messages.h | 2 +- + glusterfsd/src/glusterfsd.c | 20 ++++- + glusterfsd/src/glusterfsd.h | 1 + + libglusterfs/src/glusterfs/glusterfs.h | 1 + + tests/bugs/glusterd/bug-1696046.t | 113 +++++++++++++++++++++++++++++ + xlators/debug/io-stats/src/io-stats.c | 22 +++--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 7 ++ + 7 files changed, 152 insertions(+), 14 deletions(-) + create mode 100644 tests/bugs/glusterd/bug-1696046.t + +diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h +index 94312a5..280624c 100644 +--- a/glusterfsd/src/glusterfsd-messages.h ++++ b/glusterfsd/src/glusterfsd-messages.h +@@ -36,6 +36,6 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3, + glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33, + glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36, + glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39, +- glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42); ++ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43); + + #endif /* !_GLUSTERFSD_MESSAGES_H_ */ +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 3aa89ca..6aee4c1 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -85,8 +85,7 @@ static char gf_doc[] = ""; + static char argp_doc[] = + "--volfile-server=SERVER [MOUNT-POINT]\n" + "--volfile=VOLFILE [MOUNT-POINT]"; +-const char *argp_program_version = +- PACKAGE_NAME" "PACKAGE_VERSION; ++const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + static error_t +@@ -266,6 +265,7 @@ static struct argp_option gf_options[] = { + "attribute, dentry and page-cache. " + "Disable this only if same files/directories are not accessed across " + "two different mounts concurrently [default: \"on\"]"}, ++ {"brick-mux", ARGP_BRICK_MUX_KEY, 0, 0, "Enable brick mux. "}, + {0, 0, 0, 0, "Miscellaneous Options:"}, + { + 0, +@@ -702,7 +702,6 @@ create_fuse_mount(glusterfs_ctx_t *ctx) + xlator_t *master = NULL; + + cmd_args = &ctx->cmd_args; +- + if (!cmd_args->mount_point) { + gf_msg_trace("glusterfsd", 0, + "mount point not found, not a client process"); +@@ -1090,6 +1089,10 @@ parse_opts(int key, char *arg, struct argp_state *state) + cmd_args->thin_client = _gf_true; + break; + ++ case ARGP_BRICK_MUX_KEY: ++ cmd_args->brick_mux = _gf_true; ++ break; ++ + case ARGP_PID_FILE_KEY: + cmd_args->pid_file = gf_strdup(arg); + break; +@@ -1207,7 +1210,6 @@ parse_opts(int key, char *arg, struct argp_state *state) + case ARGP_KEY_ARG: + if (state->arg_num >= 1) + argp_usage(state); +- + cmd_args->mount_point = gf_strdup(arg); + break; + +@@ -2540,6 +2542,8 @@ postfork: + if (ret) + goto out; + } ++ gf_log("glusterfs", GF_LOG_INFO, "Pid of current running process is %d", ++ getpid()); + ret = gf_log_inject_timer_event(ctx); + + glusterfs_signals_setup(ctx); +@@ -2787,6 +2791,14 @@ main(int argc, char *argv[]) + if (ret) + goto out; + ++ /* set brick_mux mode only for server process */ ++ if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) { ++ gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43, ++ "command line argument --brick-mux is valid only for brick " ++ "process"); ++ goto out; ++ } ++ + /* log the version of glusterfs running here along with the actual + command line options. */ + { +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index 35cf6d8..fa55789 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -111,6 +111,7 @@ enum argp_option_keys { + ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, + ARGP_FUSE_LRU_LIMIT_KEY = 190, + ARGP_FUSE_AUTO_INVAL_KEY = 191, ++ ARGP_BRICK_MUX_KEY = 192 + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index deec5ba..fb727fc 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -575,6 +575,7 @@ struct _cmd_args { + + int fuse_flush_handle_interrupt; + int fuse_auto_inval; ++ bool brick_mux; + }; + typedef struct _cmd_args cmd_args_t; + +diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t +new file mode 100644 +index 0000000..e1c1eb2 +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1696046.t +@@ -0,0 +1,113 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++function count_up_bricks { ++ $CLI --xml volume status $1 | grep '<status>1' | wc -l ++} ++ ++function count_brick_processes { ++ pgrep glusterfsd | wc -l ++} ++ ++logdir=`gluster --print-logdir` ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume set all cluster.brick-multiplex on ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3}; ++TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3}; ++ ++## Start volume and verify ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $CLI volume start $V1; ++EXPECT 'Started' volinfo_field $V1 'Status'; ++ ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V1 ++ ++EXPECT 1 count_brick_processes ++ ++# Mount V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++function client-log-file-name() ++{ ++ logfilename=$M0".log" ++ echo ${logfilename:1} | tr / - ++} ++ ++function brick-log-file-name() ++{ ++ logfilename=$B0"/"$V0"1.log" ++ echo ${logfilename:1} | tr / - ++} ++ ++log_file=$logdir"/"`client-log-file-name` ++nofdlog=$(cat $log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++brick_log_file=$logdir"/bricks/"`brick-log-file-name` ++nofdlog=$(cat $brick_log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++## Set brick-log-level to DEBUG ++TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG ++ ++# Do some operation ++touch $M0/file1 ++ ++# Check debug message debug message should be exist only for V0 ++# Server xlator is common in brick_mux so after enabling DEBUG log ++# some debug message should be available for other xlators like posix ++ ++brick_log_file=$logdir"/bricks/"`brick-log-file-name` ++nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l) ++TEST [ $((nofdlog)) -ne 0 ] ++ ++#Check if any debug log exist in client-log file ++nofdlog=$(cat $log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++## Set brick-log-level to INFO ++TEST $CLI volume set $V0 diagnostics.brick-log-level INFO ++ ++## Set client-log-level to DEBUG ++TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG ++ ++# Do some operation ++touch $M0/file2 ++ ++nofdlog=$(cat $brick_log_file | grep " D " | grep file2 | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++nofdlog=$(cat $log_file | grep " D " | wc -l) ++TEST [ $((nofdlog)) -ne 0 ] ++ ++# Unmount V0 ++TEST umount $M0 ++ ++#Mount V1 ++TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++#do some operation ++touch $M0/file3 ++ ++ ++# DEBUG log level is enabled only for V0 so no debug message should be available ++# in log specific to file2 creation except for server xlator, server xlator is ++# common xlator in brick mulitplex ++nofdlog=$(cat $brick_log_file | grep file3 | grep -v server | wc -l) ++TEST [ $((nofdlog)) -eq 0 ] ++ ++# Unmount V1 ++TEST umount $M0 ++ ++cleanup; +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index 41b57c5..aa91a0a 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -3704,19 +3704,23 @@ xlator_set_loglevel(xlator_t *this, int log_level) + active = ctx->active; + top = active->first; + +- if (strcmp(top->type, "protocol/server") || (log_level == -1)) ++ if (log_level == -1) + return; + +- /* Set log-level for server xlator */ +- top->loglevel = log_level; ++ if (ctx->cmd_args.brick_mux) { ++ /* Set log-level for all brick xlators */ ++ top->loglevel = log_level; + +- /* Set log-level for parent xlator */ +- if (this->parents) +- this->parents->xlator->loglevel = log_level; ++ /* Set log-level for parent xlator */ ++ if (this->parents) ++ this->parents->xlator->loglevel = log_level; + +- while (trav) { +- trav->loglevel = log_level; +- trav = trav->next; ++ while (trav) { ++ trav->loglevel = log_level; ++ trav = trav->next; ++ } ++ } else { ++ gf_log_set_loglevel(this->ctx, log_level); + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2dd5f91..fdd7d91 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2240,6 +2240,9 @@ retry: + if (volinfo->memory_accounting) + runner_add_arg(&runner, "--mem-accounting"); + ++ if (is_brick_mx_enabled()) ++ runner_add_arg(&runner, "--brick-mux"); ++ + runner_log(&runner, "", 0, "Starting GlusterFS"); + + brickinfo->port = port; +@@ -2378,6 +2381,10 @@ unsafe_option(dict_t *this, char *key, data_t *value, void *arg) + return _gf_false; + } + ++ if (fnmatch("*diagnostics.client-log*", key, 0) == 0) { ++ return _gf_false; ++ } ++ + return _gf_true; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch b/SOURCES/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch new file mode 100644 index 0000000..6788ba8 --- /dev/null +++ b/SOURCES/0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch @@ -0,0 +1,111 @@ +From 55d945603bb52f0787c5200118673d6206ec3492 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Fri, 12 Apr 2019 19:55:10 +0530 +Subject: [PATCH 106/124] libgfchangelog : use find_library to locate shared + library + +Issue: + +libgfchangelog.so: cannot open shared object file + +Due to hardcoded shared library name runtime loader looks for particular version of +a shared library. + +Solution: + +Using find_library to locate shared library at runtime solves this issue. + +Traceback (most recent call last): + File "/usr/libexec/glusterfs/python/syncdaemon/gsyncd.py", line 323, in main + func(args) + File "/usr/libexec/glusterfs/python/syncdaemon/subcmds.py", line 82, in subcmd_worker + local.service_loop(remote) + File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", line 1261, in service_loop + changelog_agent.init() + File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 233, in __call__ + return self.ins(self.meth, *a) + File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 215, in __call__ + raise res +OSError: libgfchangelog.so: cannot open shared object file: No such file or directory + +>Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22557/ +>Change-Id: I3dd013d701ed1cd99ba7ef20d1898f343e1db8f5 +>fixes: bz#1699394 +>Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +fixes: bz#1699271 +Change-Id: If8b5827cdac658eb3a211109bd397db9a6fee8e6 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167907 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/syncdaemon/libgfchangelog.py | 3 ++- + tools/glusterfind/src/libgfchangelog.py | 7 +++---- + xlators/features/changelog/lib/examples/python/libgfchangelog.py | 4 +++- + 3 files changed, 8 insertions(+), 6 deletions(-) + +diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py +index fff9d24..8d12956 100644 +--- a/geo-replication/syncdaemon/libgfchangelog.py ++++ b/geo-replication/syncdaemon/libgfchangelog.py +@@ -10,13 +10,14 @@ + + import os + from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong ++from ctypes.util import find_library + from syncdutils import ChangelogException, ChangelogHistoryNotAvailable + from py2py3 import gr_cl_history_changelog, gr_cl_done, gr_create_string_buffer + from py2py3 import gr_cl_register, gr_cl_history_done, bytearray_to_str + + + class Changes(object): +- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, ++ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, + use_errno=True) + + @classmethod +diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py +index 1ef177a..513bb10 100644 +--- a/tools/glusterfind/src/libgfchangelog.py ++++ b/tools/glusterfind/src/libgfchangelog.py +@@ -9,8 +9,8 @@ + # cases as published by the Free Software Foundation. + + import os +-from ctypes import CDLL, get_errno, create_string_buffer, c_ulong, byref +-from ctypes import RTLD_GLOBAL ++from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref ++from ctypes.util import find_library + from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer + from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register + from gfind_py2py3 import gfind_history_changelog_done +@@ -19,8 +19,7 @@ from gfind_py2py3 import gfind_history_changelog_done + class ChangelogException(OSError): + pass + +- +-libgfc = CDLL("libgfchangelog.so", use_errno=True, mode=RTLD_GLOBAL) ++libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True) + + + def raise_oserr(prefix=None): +diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py +index 2cdbf11..2da9f2d 100644 +--- a/xlators/features/changelog/lib/examples/python/libgfchangelog.py ++++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py +@@ -1,8 +1,10 @@ + import os + from ctypes import * ++from ctypes.util import find_library + + class Changes(object): +- libgfc = CDLL("libgfchangelog.so", mode=RTLD_GLOBAL, use_errno=True) ++ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, ++ use_errno=True) + + @classmethod + def geterrno(cls): +-- +1.8.3.1 + diff --git a/SOURCES/0107-gfapi-add-function-to-set-client-pid.patch b/SOURCES/0107-gfapi-add-function-to-set-client-pid.patch new file mode 100644 index 0000000..741f2f3 --- /dev/null +++ b/SOURCES/0107-gfapi-add-function-to-set-client-pid.patch @@ -0,0 +1,93 @@ +From 799a74e5e8123cd2e67e9ed5c0f986630a8e0547 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 14 Mar 2019 18:41:11 +0530 +Subject: [PATCH 107/124] gfapi: add function to set client-pid + +This api offers the ability to set the pid of a client to a particular +value, identical to how gluster fuse clients provide the --client-pid +option. This is an internal API to be used by gluster processes only. See +https://lists.gluster.org/pipermail/gluster-devel/2019-March/055925.html +for more details. Currently glfsheal is the only proposed consumer. + +Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22368/ +Change-Id: I0620be2127d79d69cdd57cffb29bba44e6e5da1f +BUG 1676495 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166459 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + api/src/gfapi.aliases | 1 + + api/src/gfapi.map | 4 ++++ + api/src/glfs-internal.h | 6 ++++++ + api/src/glfs.c | 15 +++++++++++++++ + 4 files changed, 26 insertions(+) + +diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases +index 25e2d74..09c0fd8 100644 +--- a/api/src/gfapi.aliases ++++ b/api/src/gfapi.aliases +@@ -172,6 +172,7 @@ _pub_glfs_upcall_lease_get_lease_type _glfs_upcall_lease_get_lease_type$GFAPI_4. + + _priv_glfs_statx _glfs_statx$GFAPI_6.0 + _priv_glfs_iatt_from_statx _glfs_iatt_from_statx$GFAPI_6.0 ++_priv_glfs_setfspid _glfs_setfspid$GFAPI_6.1 + + _pub_glfs_read_async _glfs_read_async$GFAPI_6.0 + _pub_glfs_write_async _glfs_write_async$GFAPI_6.0 +diff --git a/api/src/gfapi.map b/api/src/gfapi.map +index bb201c7..b97a614 100644 +--- a/api/src/gfapi.map ++++ b/api/src/gfapi.map +@@ -267,3 +267,7 @@ GFAPI_6.0 { + glfs_fsetattr; + } GFAPI_PRIVATE_6.0; + ++GFAPI_PRIVATE_6.1 { ++ global: ++ glfs_setfspid; ++} GFAPI_6.0; +diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h +index 40bbb8a..55401b2 100644 +--- a/api/src/glfs-internal.h ++++ b/api/src/glfs-internal.h +@@ -702,4 +702,10 @@ void + glfs_iatt_from_statx(struct iatt *, const struct glfs_stat *) + GFAPI_PRIVATE(glfs_iatt_from_statx, 6.0); + ++/* ++ * This API is a per thread setting, similar to glfs_setfs{u/g}id, because of ++ * the call to syncopctx_setfspid. ++ */ ++int ++glfs_setfspid(struct glfs *, pid_t) GFAPI_PRIVATE(glfs_setfspid, 6.1); + #endif /* !_GLFS_INTERNAL_H */ +diff --git a/api/src/glfs.c b/api/src/glfs.c +index b741f6e..f4a8e08 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -1461,6 +1461,21 @@ invalid_fs: + + GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0); + ++int ++priv_glfs_setfspid(struct glfs *fs, pid_t pid) ++{ ++ cmd_args_t *cmd_args = NULL; ++ int ret = 0; ++ ++ cmd_args = &fs->ctx->cmd_args; ++ cmd_args->client_pid = pid; ++ cmd_args->client_pid_set = 1; ++ ret = syncopctx_setfspid(&pid); ++ ++ return ret; ++} ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1); ++ + void + pub_glfs_free(void *ptr) + { +-- +1.8.3.1 + diff --git a/SOURCES/0108-afr-add-client-pid-to-all-gf_event-calls.patch b/SOURCES/0108-afr-add-client-pid-to-all-gf_event-calls.patch new file mode 100644 index 0000000..eda9dd9 --- /dev/null +++ b/SOURCES/0108-afr-add-client-pid-to-all-gf_event-calls.patch @@ -0,0 +1,225 @@ +From ba1460a4fee0c41c7d7f7a2043bae37f7e751259 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Fri, 15 Mar 2019 19:31:03 +0530 +Subject: [PATCH 108/124] afr: add client-pid to all gf_event() calls + +client-pid for glustershd is GF_CLIENT_PID_SELF_HEALD +client-pid for glfsheal is GF_CLIENT_PID_GLFS_HEALD + +Patch on upstream master: https://review.gluster.org/#/c/glusterfs/+/22369/ +BUG: 1676495 +Change-Id: Ib3a863af160ff48c822a5e6b0c27c575c9887470 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166460 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + heal/src/glfs-heal.c | 6 ++++++ + xlators/cluster/afr/src/afr-common.c | 12 ++++++++---- + xlators/cluster/afr/src/afr-self-heal-common.c | 11 +++++++---- + xlators/cluster/afr/src/afr-self-heal-data.c | 4 +++- + xlators/cluster/afr/src/afr-self-heal-entry.c | 5 +++-- + xlators/cluster/afr/src/afr-self-heal-metadata.c | 4 +++- + xlators/cluster/afr/src/afr-self-heal-name.c | 7 ++++--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 10 ++++++++++ + 8 files changed, 44 insertions(+), 15 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 6030de3..7e37e47 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -1688,6 +1688,12 @@ main(int argc, char **argv) + goto out; + } + ++ ret = glfs_setfspid(fs, GF_CLIENT_PID_GLFS_HEAL); ++ if (ret) { ++ printf("Setting client pid failed, %s\n", strerror(errno)); ++ goto out; ++ } ++ + ret = glfs_init(fs); + if (ret < 0) { + ret = -errno; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 47a5d3a..3690b84 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5233,7 +5233,8 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator, + "Subvolume '%s' came back up; " + "going online.", + child_xlator->name); +- gf_event(EVENT_AFR_SUBVOL_UP, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } else { + *event = GF_EVENT_SOME_DESCENDENT_UP; + } +@@ -5310,7 +5311,8 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx, + "All subvolumes are down. Going " + "offline until at least one of them " + "comes back up."); +- gf_event(EVENT_AFR_SUBVOLS_DOWN, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } else { + *event = GF_EVENT_SOME_DESCENDENT_DOWN; + } +@@ -5585,12 +5587,14 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2) + if (!had_quorum && has_quorum) { + gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET, + "Client-quorum is met"); +- gf_event(EVENT_AFR_QUORUM_MET, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } + if (had_quorum && !has_quorum) { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL, + "Client-quorum is not met"); +- gf_event(EVENT_AFR_QUORUM_FAIL, "subvol=%s", this->name); ++ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s", ++ this->ctx->cmd_args.client_pid, this->name); + } + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 2268761..595bed4 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -383,11 +383,12 @@ out: + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2), + priv->children[src_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=gfid;file=" + "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;" + "child-%d=%s;gfid-%d=%s", +- this->name, uuid_utoa(pargfid), bname, child_idx, +- priv->children[child_idx]->name, child_idx, ++ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid), ++ bname, child_idx, priv->children[child_idx]->name, child_idx, + uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx, + priv->children[src_idx]->name, src_idx, + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2)); +@@ -2296,11 +2297,13 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + priv->children[i]->name, + uuid_utoa(replies[i].poststat.ia_gfid)); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;" + "type=file;gfid=%s;" + "ia_type-%d=%s;ia_type-%d=%s", +- this->name, uuid_utoa(replies[i].poststat.ia_gfid), +- first_idx, gf_inode_type_to_str(first.ia_type), i, ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(replies[i].poststat.ia_gfid), first_idx, ++ gf_inode_type_to_str(first.ia_type), i, + gf_inode_type_to_str(replies[i].poststat.ia_type)); + ret = -EIO; + goto out; +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index d9a0ee3..18a0334 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -537,9 +537,11 @@ __afr_selfheal_data_finalize_source( + replies, AFR_DATA_TRANSACTION); + if (source < 0) { + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=data;" + "file=%s", +- this->name, uuid_utoa(inode->gfid)); ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(inode->gfid)); + return -EIO; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index b23ed6a..fc09b4c 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -269,11 +269,12 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this, + gf_inode_type_to_str(replies[src_idx].poststat.ia_type), + priv->children[src_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=file;" + "file=<gfid:%s>/%s>;count=2;child-%d=%s;type-" + "%d=%s;child-%d=%s;type-%d=%s", +- this->name, uuid_utoa(pargfid), bname, i, +- priv->children[i]->name, i, ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i, + gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx, + priv->children[src_idx]->name, src_idx, + gf_inode_type_to_str(replies[src_idx].poststat.ia_type)); +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index a661fcb..ba43341 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -242,9 +242,11 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, + + if (!priv->metadata_splitbrain_forced_heal) { + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;" + "type=metadata;file=%s", +- this->name, uuid_utoa(inode->gfid)); ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(inode->gfid)); + return -EIO; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index c4df5d4..36640b5 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -222,13 +222,14 @@ afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies, + gf_inode_type_to_str(inode_type), + priv->children[type_idx]->name); + gf_event(EVENT_AFR_SPLIT_BRAIN, ++ "client-pid=%d;" + "subvol=%s;type=file;" + "file=<gfid:%s>/%s;count=2;" + "child-%d=%s;type-%d=%s;child-%d=%s;" + "type-%d=%s", +- this->name, uuid_utoa(pargfid), bname, i, +- priv->children[i]->name, i, +- gf_inode_type_to_str(inode_type1), type_idx, ++ this->ctx->cmd_args.client_pid, this->name, ++ uuid_utoa(pargfid), bname, i, priv->children[i]->name, ++ i, gf_inode_type_to_str(inode_type1), type_idx, + priv->children[type_idx]->name, type_idx, + gf_inode_type_to_str(inode_type)); + return -EIO; +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 04a4b2e..19eca9f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -324,6 +324,7 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) + { + int ret = -1; + char glusterd_uuid_option[PATH_MAX] = {0}; ++ char client_pid[32] = {0}; + dict_t *cmdline = NULL; + + cmdline = dict_new(); +@@ -335,6 +336,15 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) + if (ret < 0) + goto out; + ++ ret = snprintf(client_pid, sizeof(client_pid), "--client-pid=%d", ++ GF_CLIENT_PID_SELF_HEALD); ++ if (ret < 0) ++ goto out; ++ ++ ret = dict_set_str(cmdline, "arg", client_pid); ++ if (ret < 0) ++ goto out; ++ + /* Pass cmdline arguments as key-value pair. The key is merely + * a carrier and is not used. Since dictionary follows LIFO the value + * should be put in reverse order*/ +-- +1.8.3.1 + diff --git a/SOURCES/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch b/SOURCES/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch new file mode 100644 index 0000000..ed912ea --- /dev/null +++ b/SOURCES/0109-glusterd-Optimize-glusterd-handshaking-code-path.patch @@ -0,0 +1,613 @@ +From aff18f761ef64d55635daa9a1d2140fe35632820 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Fri, 29 Mar 2019 11:48:32 +0530 +Subject: [PATCH 109/124] glusterd: Optimize glusterd handshaking code path + +Problem: At the time of handshaking glusterd populate volume + data in a dictionary.While no. of volumes are configured + more than 1500 glusterd takes more than 10 min to generated + the data.Due to taking more time rpc request times out and + rpc start bailing of call frames. + +Solution: To optimize the code done below changes + 1) Spawn multiple threads to populate volumes data in bulk + in separate dictionary and introduce an option + glusterd.brick-dict-thread-count to configure no. of threads + to populate volume data. + 2) Populate tier data only while volume type is tier + 3) Compare snap data only while snap_count is non zero + +> Fixes: bz#1699339 +> Change-Id: I38dc71970c049217f9d1a06fc0aaf4c26eab18f5 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit 26a19d9da3ab5604db02d4ca02ce868fb57193a4) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22556/) + +Bug: 1652461 +Change-Id: Ia81671a7e1f173bcb32da9dc439be9e61c18bde1 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/167981 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 4 +- + tests/bugs/glusterd/bug-1699339.t | 69 ++++++ + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 + + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 3 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 269 +++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 55 +++++ + xlators/mgmt/glusterd/src/glusterd.h | 10 + + 7 files changed, 362 insertions(+), 49 deletions(-) + create mode 100644 tests/bugs/glusterd/bug-1699339.t + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 6642ba0..e45db14 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,7 +50,7 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_6_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ +@@ -134,6 +134,8 @@ + + #define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */ + ++#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ ++ + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t +new file mode 100644 +index 0000000..3e950f4 +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1699339.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++ ++cleanup; ++ ++NUM_VOLS=15 ++ ++ ++get_brick_base () { ++ printf "%s/vol%02d" $B0 $1 ++} ++ ++function count_up_bricks { ++ vol=$1; ++ $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l ++} ++ ++create_volume () { ++ ++ local vol_name=$(printf "%s-vol%02d" $V0 $1) ++ ++ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name} ++ TEST $CLI_1 volume start $vol_name ++} ++ ++TEST launch_cluster 3 ++TEST $CLI_1 volume set all cluster.brick-multiplex on ++ ++# The option accepts the value in the range from 5 to 200 ++TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210 ++TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4 ++ ++TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5 ++ ++TEST $CLI_1 peer probe $H2; ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count ++ ++TEST $CLI_1 peer probe $H3; ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++ ++# Our infrastructure can't handle an arithmetic expression here. The formula ++# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other ++# NUM_VOLS-1 and there are 5 such statements in each iteration. ++TESTS_EXPECTED_IN_LOOP=28 ++for i in $(seq 1 $NUM_VOLS); do ++ starttime="$(date +%s)"; ++ create_volume $i ++done ++ ++TEST kill_glusterd 1 ++ ++vol1=$(printf "%s-vol%02d" $V0 1) ++TEST $CLI_2 volume set $vol1 performance.readdir-ahead on ++vol2=$(printf "%s-vol%02d" $V0 2) ++TEST $CLI_2 volume set $vol2 performance.readdir-ahead on ++ ++# Bring back 1st glusterd ++TEST $glusterd_1 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++ ++EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead ++ ++vol_name=$(printf "%s-vol%02d" $V0 2) ++EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead ++ ++cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 95f9707..94a5e1f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -87,6 +87,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { + * TBD: Discuss the default value for this. Maybe this should be a + * dynamic value depending on the memory specifications per node */ + {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, ++ {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE}, + /*{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},*/ + {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, + {NULL}, +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index b3c4158..d225854 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -2099,6 +2099,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername, + goto out; + } + ++ if (!snap_count) ++ goto out; ++ + for (i = 1; i <= snap_count; i++) { + /* Compare one snapshot from peer_data at a time */ + ret = glusterd_compare_snap(peer_data, i, peername, peerid); +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index fdd7d91..ff6102b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -155,6 +155,47 @@ out: + return ret; + } + ++int ++get_gd_vol_thread_limit(int *thread_limit) ++{ ++ char *value = NULL; ++ int ret = -1; ++ int vol_per_thread_limit = 0; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ if (!is_brick_mx_enabled()) { ++ vol_per_thread_limit = 1; ++ ret = 0; ++ goto out; ++ } ++ ++ ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD, ++ SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value); ++ if (ret) { ++ value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE; ++ } ++ ret = gf_string2int(value, &vol_per_thread_limit); ++ if (ret) ++ goto out; ++ ++out: ++ *thread_limit = vol_per_thread_limit; ++ ++ gf_msg_debug("glusterd", 0, ++ "Per Thread volume limit set to %d glusterd to populate dict " ++ "data parallel", ++ *thread_limit); ++ ++ return ret; ++} ++ + extern struct volopt_map_entry glusterd_volopt_map[]; + extern glusterd_all_vol_opts valid_all_vol_opts[]; + +@@ -3070,50 +3111,55 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, + + /* tiering related variables */ + +- snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); +- if (ret) +- goto out; ++ if (volinfo->type == GF_CLUSTER_TYPE_TIER) { ++ snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); ++ ret = dict_set_uint32(dict, key, ++ volinfo->tier_info.cold_disperse_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); ++ ret = dict_set_uint32(dict, key, ++ volinfo->tier_info.cold_redundancy_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); ++ ret = dict_set_uint32(dict, key, ++ volinfo->tier_info.cold_dist_leaf_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); ++ if (ret) ++ goto out; + +- snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); +- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); +- if (ret) +- goto out; ++ snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); ++ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); ++ if (ret) ++ goto out; ++ } + + snprintf(key, sizeof(key), "%s%d", prefix, count); + ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo); +@@ -3363,33 +3409,40 @@ out: + return ret; + } + +-int32_t +-glusterd_add_volumes_to_export_dict(dict_t **peer_data) ++void * ++glusterd_add_bulk_volumes_create_thread(void *data) + { + int32_t ret = -1; +- dict_t *dict = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t count = 0; +- glusterd_dict_ctx_t ctx = {0}; + xlator_t *this = NULL; ++ glusterd_add_dict_args_t *arg = NULL; ++ dict_t *dict = NULL; ++ int start = 0; ++ int end = 0; + +- this = THIS; +- GF_ASSERT(this); ++ GF_ASSERT(data); ++ ++ arg = data; ++ dict = arg->voldict; ++ start = arg->start; ++ end = arg->end; ++ this = arg->this; ++ THIS = arg->this; + priv = this->private; + GF_ASSERT(priv); + +- dict = dict_new(); +- if (!dict) +- goto out; +- + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; ++ if ((count < start) || (count > end)) ++ continue; ++ + ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); + if (ret) + goto out; +- if (!glusterd_is_volume_quota_enabled(volinfo)) ++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + continue; + ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, + "volume"); +@@ -3397,7 +3450,122 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + goto out; + } + +- ret = dict_set_int32n(dict, "count", SLEN("count"), count); ++out: ++ GF_ATOMIC_DEC(priv->thread_count); ++ free(arg); ++ return NULL; ++} ++ ++int32_t ++glusterd_add_volumes_to_export_dict(dict_t **peer_data) ++{ ++ int32_t ret = -1; ++ dict_t *dict = NULL; ++ dict_t *dict_arr[128] = { ++ 0, ++ }; ++ glusterd_conf_t *priv = NULL; ++ glusterd_volinfo_t *volinfo = NULL; ++ int32_t count = 0; ++ glusterd_dict_ctx_t ctx = {0}; ++ xlator_t *this = NULL; ++ int totthread = 0; ++ int volcnt = 0; ++ int start = 1; ++ int endindex = 0; ++ int vol_per_thread_limit = 0; ++ glusterd_add_dict_args_t *arg = NULL; ++ pthread_t th_id = { ++ 0, ++ }; ++ int th_ret = 0; ++ int i = 0; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); ++ ++ dict = dict_new(); ++ if (!dict) ++ goto out; ++ ++ /* Count the total number of volumes */ ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++; ++ ++ get_gd_vol_thread_limit(&vol_per_thread_limit); ++ ++ if ((vol_per_thread_limit == 1) || (vol_per_thread_limit > 100)) { ++ totthread = 0; ++ } else { ++ totthread = volcnt / vol_per_thread_limit; ++ endindex = volcnt % vol_per_thread_limit; ++ if (endindex) ++ totthread++; ++ } ++ ++ if (totthread == 0) { ++ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) ++ { ++ count++; ++ ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); ++ if (ret) ++ goto out; ++ ++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) ++ continue; ++ ++ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, ++ "volume"); ++ if (ret) ++ goto out; ++ } ++ } else { ++ for (i = 0; i < totthread; i++) { ++ arg = calloc(1, sizeof(*arg)); ++ dict_arr[i] = dict_new(); ++ arg->this = this; ++ arg->voldict = dict_arr[i]; ++ arg->start = start; ++ if (!endindex) { ++ arg->end = ((i + 1) * vol_per_thread_limit); ++ } else { ++ arg->end = (start + endindex); ++ } ++ th_ret = gf_thread_create_detached( ++ &th_id, glusterd_add_bulk_volumes_create_thread, arg, ++ "bulkvoldict"); ++ if (th_ret) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "glusterd_add_bulk_volume %s" ++ " thread creation failed", ++ "bulkvoldict"); ++ free(arg); ++ goto out; ++ } ++ ++ start = start + vol_per_thread_limit; ++ GF_ATOMIC_INC(priv->thread_count); ++ gf_log(this->name, GF_LOG_INFO, ++ "Create thread %d to populate dict data for volume" ++ " start index is %d end index is %d", ++ (i + 1), arg->start, arg->end); ++ } ++ while (GF_ATOMIC_GET(priv->thread_count)) { ++ sleep(1); ++ } ++ ++ gf_log(this->name, GF_LOG_INFO, ++ "Finished dictionary popluation in all threads"); ++ for (i = 0; i < totthread; i++) { ++ dict_copy_with_ref(dict_arr[i], dict); ++ dict_unref(dict_arr[i]); ++ } ++ gf_log(this->name, GF_LOG_INFO, ++ "Finished merger of all dictionraies into single one"); ++ } ++ ++ ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt); + if (ret) + goto out; + +@@ -3499,6 +3667,9 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + goto out; + } + ++ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) ++ goto skip_quota; ++ + snprintf(key, sizeof(key), "volume%d.quota-version", count); + ret = dict_get_uint32(peer_data, key, "a_version); + if (ret) { +@@ -3550,6 +3721,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + goto out; + } + } ++ ++skip_quota: + *status = GLUSTERD_VOL_COMP_SCS; + + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 42ca9bb..10aa2ae 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1058,6 +1058,51 @@ out: + } + + static int ++validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict, ++ char *key, char *value, char **op_errstr) ++{ ++ xlator_t *this = NULL; ++ uint val = 0; ++ int ret = -1; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ if (!is_brick_mx_enabled()) { ++ gf_asprintf(op_errstr, ++ "Brick-multiplexing is not enabled. " ++ "Please enable brick multiplexing before trying " ++ "to set this option."); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s", ++ *op_errstr); ++ goto out; ++ } ++ ++ ret = gf_string2uint(value, &val); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "%s is not a valid count. " ++ "%s expects an unsigned integer.", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", ++ *op_errstr); ++ } ++ ++ if ((val < 5) || (val > 200)) { ++ gf_asprintf( ++ op_errstr, ++ "Please set this option to a greater than 5 or less than 200 " ++ "to optimize dict generated while no. of volumes are more"); ++ ret = -1; ++ goto out; ++ } ++out: ++ gf_msg_debug("glusterd", 0, "Returning %d", ret); ++ ++ return ret; ++} ++ ++static int + validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) + { +@@ -3520,6 +3565,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "brick multiplexing. Brick multiplexing ensures that " + "compatible brick instances can share one single " + "brick process."}, ++ {.key = GLUSTERD_VOL_CNT_PER_THRD, ++ .voltype = "mgmt/glusterd", ++ .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE, ++ .op_version = GD_OP_VERSION_7_0, ++ .validate_fn = validate_volume_per_thread_limit, ++ .type = GLOBAL_NO_DOC, ++ .description = ++ "This option can be used to limit the number of volumes " ++ "handled by per thread to populate peer data.The option accepts " ++ " the value in the range of 5 to 200"}, + {.key = GLUSTERD_BRICKMUX_LIMIT_KEY, + .voltype = "mgmt/glusterd", + .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 0ac6e63..bd9f509 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -57,8 +57,10 @@ + #define GLUSTER_SHARED_STORAGE "gluster_shared_storage" + #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" + #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" ++#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread" + #define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" + #define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250" ++#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100" + #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging" + #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" + +@@ -225,8 +227,16 @@ typedef struct { + which might lead the modification of volinfo + list. + */ ++ gf_atomic_t thread_count; + } glusterd_conf_t; + ++typedef struct glusterd_add_dict_args { ++ xlator_t *this; ++ dict_t *voldict; ++ int start; ++ int end; ++} glusterd_add_dict_args_t; ++ + typedef enum gf_brick_status { + GF_BRICK_STOPPED, + GF_BRICK_STARTED, +-- +1.8.3.1 + diff --git a/SOURCES/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch b/SOURCES/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch new file mode 100644 index 0000000..eedac5e --- /dev/null +++ b/SOURCES/0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch @@ -0,0 +1,108 @@ +From 6e7d333625ecd9f7402c2e839338350fa86eaf45 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Tue, 16 Apr 2019 17:07:37 +0530 +Subject: [PATCH 110/124] tier/shd/glusterd: with shd mux, the shd volfile path + have to be updated for tier-heald.t + +The volfile path for glustershd has been changed to volume based +from node based with the shd mux. And those changes for the +tier-heald.t test case have been made in this patch. + +label: DOWNSTREAM ONLY + +Change-Id: I0137f7e02c2bf3721dd51c6dfb215cd81b31d6ef +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168038 +Reviewed-by: Rafi Kavungal Chundattu Parambil <rkavunga@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/tier/tier-heald.t | 35 ++++++++++++++++++++--------------- + 1 file changed, 20 insertions(+), 15 deletions(-) + +diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t +index a8e634f..0ec9e43 100644 +--- a/tests/basic/tier/tier-heald.t ++++ b/tests/basic/tier/tier-heald.t +@@ -11,7 +11,7 @@ cleanup; + TEST glusterd + TEST pidof glusterd + +-volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" ++r2_volfile=$(gluster system:: getwd)"/vols/r2/r2-shd.vol" + + # Commands should fail when both tiers are not of distribute type. + # Glustershd shouldn't be running as long as there are no replicate/disperse +@@ -34,51 +34,56 @@ TEST $CLI volume tier r2 attach $H0:$B0/r2_hot + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 enable + EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "enable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 disable + EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "disable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + # Commands should work on disperse volume. + TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 + TEST $CLI volume start ec2 + ++ec2_volfile=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol" ++ + TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4} + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 enable + EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "enable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 disable + EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "disable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + + #Check that shd graph is rewritten correctly on volume stop/start +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop r2 +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++ ++# Has been commented as the validations after stop using volfile dont hold true. ++#EXPECT "N" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop ec2 + # When both the volumes are stopped glustershd volfile is not modified just the + # process is stopped + TEST "[ -z $(get_shd_process_pid) ]" + + TEST $CLI volume start r2 +-EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate ++# Has been commented as the validations after stop using volfile dont hold true. ++#EXPECT "N" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate + + TEST $CLI volume start ec2 + +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate + + TEST $CLI volume tier ec2 detach force + +-EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse +-EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate + + TEST $CLI volume set r2 self-heal-daemon on + TEST $CLI volume set r2 cluster.self-heal-daemon off +-- +1.8.3.1 + diff --git a/SOURCES/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch b/SOURCES/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch new file mode 100644 index 0000000..90a25b3 --- /dev/null +++ b/SOURCES/0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch @@ -0,0 +1,49 @@ +From 310e09d46cdb293e4af2df0085b8ac45d5c17933 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Tue, 16 Apr 2019 17:20:34 +0530 +Subject: [PATCH 111/124] glusterd: fix loading ctime in client graph logic + +Commit efbf8ab wasn't handling all the scenarios of toggling ctime +option correctly and more over a ! had completely tossed up the logic. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22578/ + +>Fixes: bz#1697907 +>Change-Id: If12e2f69045e59878992ee2cd0518cc0eabcce0d +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1697820 +Change-Id: If12e2f69045e59878992ee2cd0518cc0eabcce0d +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168048 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 324ec2f..da877aa 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4358,9 +4358,15 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + goto out; + } + } +- +- if (conf->op_version >= GD_OP_VERSION_5_0 && +- !dict_get_str_boolean(set_dict, "features.ctime", _gf_false)) { ++ /* a. ret will be -1 if features.ctime is not set in the volinfo->dict which ++ * means ctime should be loaded into the graph. ++ * b. ret will be 1 if features.ctime is explicitly turned on through ++ * volume set and in that case ctime should be loaded into the graph. ++ * c. ret will be 0 if features.ctime is explicitly turned off and in that ++ * case ctime shouldn't be loaded into the graph. ++ */ ++ ret = dict_get_str_boolean(set_dict, "features.ctime", -1); ++ if (conf->op_version >= GD_OP_VERSION_5_0 && ret) { + xl = volgen_graph_add(graph, "features/utime", volname); + if (!xl) { + ret = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch b/SOURCES/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch new file mode 100644 index 0000000..ddcb82c --- /dev/null +++ b/SOURCES/0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch @@ -0,0 +1,45 @@ +From 1df830953b9a09404f9ca6a0539172e9f23ecbf4 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Wed, 17 Apr 2019 15:13:12 +0530 +Subject: [PATCH 112/124] geo-rep : fix incorrectly formatted authorized_keys + +Problem : While Geo-rep setup when creating an ssh authorized_keys + the geo-rep setup inserts an extra space before the "ssh-rsa" label. + This gets flagged by an enterprise customer's security scan as a + security violation. + +Solution: Remove extra space in GSYNCD_CMD & TAR_CMD. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22246/ +>Change-Id: I956f938faef0e0883703bbc337b1dc2770e4a921 +>fixes: bz#1679401 +>Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1671862 +Change-Id: I194a2bddcf2ee9b8286b204f8c4da5c480a528b3 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168144 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/src/peer_georep-sshkey.py.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/src/peer_georep-sshkey.py.in b/geo-replication/src/peer_georep-sshkey.py.in +index 2196fd7..58696e9 100644 +--- a/geo-replication/src/peer_georep-sshkey.py.in ++++ b/geo-replication/src/peer_georep-sshkey.py.in +@@ -30,8 +30,8 @@ from prettytable import PrettyTable + + SECRET_PEM = "@GLUSTERD_WORKDIR@/geo-replication/secret.pem" + TAR_SSH_PEM = "@GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem" +-GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" ' +-TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" ' ++GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" ' ++TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" ' + COMMON_SECRET_FILE = "@GLUSTERD_WORKDIR@/geo-replication/common_secret.pem.pub" + + +-- +1.8.3.1 + diff --git a/SOURCES/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch b/SOURCES/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch new file mode 100644 index 0000000..9f53a1e --- /dev/null +++ b/SOURCES/0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch @@ -0,0 +1,71 @@ +From 850d5418fb48417d94ab17e565b2184ba951ccbe Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Wed, 17 Apr 2019 18:04:44 +0530 +Subject: [PATCH 113/124] spec: Glusterd did not start by default after node + reboot + +Problem: After install gluster rpms glusterd service is not enabled + so systemctl status is showing "disabled" + +Solution: Update glusterfs.spec.in to enable glusterd after install + gluster rpms + +label: DOWNSTREAM ONLY +BUG: 1699835 + +Change-Id: Ied9be5dfb1bf3bda24868722b1fbd77cb1c1d18c +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168168 +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index cb17eaa..ba095b7 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -168,6 +168,8 @@ + %endif + + %if ( 0%{?_with_systemd:1} ) ++%global service_enable() /bin/systemctl --quiet enable %1.service || : \ ++%{nil} + %global service_start() /bin/systemctl --quiet start %1.service || : \ + %{nil} + %global service_stop() /bin/systemctl --quiet stop %1.service || :\ +@@ -181,7 +183,7 @@ + %global glustereventsd_svcfile %{_unitdir}/glustereventsd.service + %global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service + %else +-%global systemd_post() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \ ++%global service_enable() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \ + %{nil} + %global systemd_preun() /sbin/chkconfig --del %1 >/dev/null 2>&1 || : \ + %{nil} +@@ -926,7 +928,7 @@ exit 0 + + %if ( 0%{!?_without_events:1} ) + %post events +-%systemd_post glustereventsd ++%service_enable glustereventsd + %endif + + %if ( 0%{!?_without_server:1} ) +@@ -951,9 +953,9 @@ exit 0 + %if ( 0%{!?_without_server:1} ) + %post server + # Legacy server +-%systemd_post glusterd ++%service_enable glusterd + %if ( 0%{_for_fedora_koji_builds} ) +-%systemd_post glusterfsd ++%service_enable glusterfsd + %endif + # ".cmd_log_history" is renamed to "cmd_history.log" in GlusterFS-3.7 . + # While upgrading glusterfs-server package form GlusterFS version <= 3.6 to +-- +1.8.3.1 + diff --git a/SOURCES/0114-core-fix-hang-issue-in-__gf_free.patch b/SOURCES/0114-core-fix-hang-issue-in-__gf_free.patch new file mode 100644 index 0000000..7e26642 --- /dev/null +++ b/SOURCES/0114-core-fix-hang-issue-in-__gf_free.patch @@ -0,0 +1,46 @@ +From da53d9027d9426c0023176a42e0550d6ccccc941 Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Mon, 22 Apr 2019 21:18:30 +0530 +Subject: [PATCH 114/124] core: fix hang issue in __gf_free + +Currently GF_ASSERT is done under mem_accounting lock at some places. +On a GF_ASSERT failure, gf_msg_callingfn is called which calls gf_malloc +internally and it takes the same mem_accounting lock leading to deadlock. + +This is a temporary fix to avoid any hang issue in master. +https://review.gluster.org/#/c/glusterfs/+/22589/ is being worked on +in the mean while so that GF_ASSERT can be used under mem_accounting +lock. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22600/ + +>Change-Id: I6d67f23979e7edd2695bdc6aab2997dae4a4060a +>updates: bz#1700865 +>Signed-off-by: Susant Palai <spalai@redhat.com> + +Change-Id: I6d67f23979e7edd2695bdc6aab2997dae4a4060a +BUG: 1698728 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168474 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Nithya Balachandran <nbalacha@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/mem-pool.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 81badc0..34cb87a 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -348,7 +348,6 @@ __gf_free(void *free_ptr) + + LOCK(&mem_acct->rec[header->type].lock); + { +- GF_ASSERT(mem_acct->rec[header->type].size >= header->size); + mem_acct->rec[header->type].size -= header->size; + mem_acct->rec[header->type].num_allocs--; + /* If all the instances are freed up then ensure typestr is set +-- +1.8.3.1 + diff --git a/SOURCES/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch b/SOURCES/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch new file mode 100644 index 0000000..b53ff91 --- /dev/null +++ b/SOURCES/0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch @@ -0,0 +1,56 @@ +From 4901fcc0cc507accf30e1a4bdd020a5676488751 Mon Sep 17 00:00:00 2001 +From: Niels de Vos <ndevos@redhat.com> +Date: Mon, 8 Apr 2019 12:14:34 +0200 +Subject: [PATCH 115/124] core: only log seek errors if SEEK_HOLE/SEEK_DATA is + available + +On RHEL-6 there is no support for SEEK_HOLE/SEEK_DATA and this causes +the POSIX xlator to return errno=EINVAL. Because of this, the rpc-server +xlator will log all 'failed' seek attempts. When applications call +seek() often, the brick logs can grow very quickly and fill up the +disks. + +Messages that get logged are like +[server-rpc-fops.c:2091:server_seek_cbk] 0-vol01-server: 4947: SEEK-2 (53920aee-062c-4598-aa50-2b4d7821b204), client: worker.example.com-7808-2019/02/08-18:04:57:903430-vol01-client-0-0-0, error-xlator: vol01-posix [Invalid argument] + +The problem can be reproduced by running a Gluster Server on RHEL-6, +with a client running on RHEL-7. The client should execute an +application that calls lseek() with SEEK_HOLE/SEEK_DATA. + +>Change-Id: I7b6c16f8e0ba1a183e845cfdb8d5a3f8caeab138 +>Fixes: bz#1697316 +>Signed-off-by: Niels de Vos <ndevos@redhat.com> + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22526/ + +BUG: 1696903 +Change-Id: I7b6c16f8e0ba1a183e845cfdb8d5a3f8caeab138 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168527 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + libglusterfs/src/common-utils.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c +index a0c83c0..70d5d21 100644 +--- a/libglusterfs/src/common-utils.c ++++ b/libglusterfs/src/common-utils.c +@@ -4500,9 +4500,13 @@ fop_log_level(glusterfs_fop_t fop, int op_errno) + return GF_LOG_DEBUG; + + if (fop == GF_FOP_SEEK) { ++#ifdef HAVE_SEEK_HOLE + if (op_errno == ENXIO) { + return GF_LOG_DEBUG; + } ++#else ++ return GF_LOG_DEBUG; ++#endif + } + + return GF_LOG_ERROR; +-- +1.8.3.1 + diff --git a/SOURCES/0116-cluster-ec-fix-fd-reopen.patch b/SOURCES/0116-cluster-ec-fix-fd-reopen.patch new file mode 100644 index 0000000..5426c70 --- /dev/null +++ b/SOURCES/0116-cluster-ec-fix-fd-reopen.patch @@ -0,0 +1,1931 @@ +From e33b3e0a443d4a54634a664f2d499a3fce9e7fb4 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Tue, 16 Apr 2019 14:19:47 +0530 +Subject: [PATCH 116/124] cluster/ec: fix fd reopen + +Currently EC tries to reopen fd's that have been opened while a brick +was down. This is done as part of regular write operations, just after +having acquired the locks, and it's sent as a sub-fop of the main write +fop. + +There were two problems: + +1. The reopen was attempted on all UP bricks, even if a previous lock +didn't succeed. This is incorrect because most probably the open will +fail. + +2. If reopen is sent and fails, the error is propagated to the main +operation, causing it to fail when it shouldn't. + +To fix this, we only attempt reopens on bricks where the current fop +owns a lock, and we prevent any error to be propagated to the main +fop. + +To implement this behaviour an argument used to indicate the minimum +number of required answers has overloaded to also include some flags. To +make the change consistent, it has been necessary to rename the +argument, which means that a lot of files have been changed. However +there are no functional changes. + +This change has also uncovered a problem in discard code, which didn't +correctely process requests of small sizes because no real discard fop +was being processed, only a write of 0's on some region. In this case +some fields of the fop remained uninitialized or with incorrect values. +To fix this, a new function has been created to simulate success on a +fop and it's used in the discard case. + +Thanks to Pranith for providing a test script that has also detected an +issue in this patch. This patch includes a small modification of this +script to force data to be written into bricks before stopping them. + +Upstream patch: https://review.gluster.org/22574 +> Change-Id: I7ccd1fc5fc134eeb6d443c755962a20819320d48 +> BUG: bz#1699866 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +Upstream patch: https://review.gluster.org/22558 +> Change-Id: If272343873369186c2fb8f43c1d9c52c3ea304ec +> BUG: bz#1699866 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: If272343873369186c2fb8f43c1d9c52c3ea304ec +Fixes: bz#1663375 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168522 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> +--- + tests/basic/ec/self-heal-read-write-fail.t | 69 +++++++++++++ + tests/bugs/ec/bug-1699866-check-reopen-fd.t | 34 +++++++ + xlators/cluster/ec/src/ec-common.c | 73 ++++++++++---- + xlators/cluster/ec/src/ec-common.h | 14 ++- + xlators/cluster/ec/src/ec-data.c | 7 +- + xlators/cluster/ec/src/ec-data.h | 2 +- + xlators/cluster/ec/src/ec-dir-read.c | 12 +-- + xlators/cluster/ec/src/ec-dir-write.c | 52 +++++----- + xlators/cluster/ec/src/ec-fops.h | 144 ++++++++++++++-------------- + xlators/cluster/ec/src/ec-generic.c | 54 ++++++----- + xlators/cluster/ec/src/ec-heal.c | 20 ++-- + xlators/cluster/ec/src/ec-inode-read.c | 58 +++++------ + xlators/cluster/ec/src/ec-inode-write.c | 74 +++++++------- + xlators/cluster/ec/src/ec-locks.c | 36 +++---- + xlators/cluster/ec/src/ec-types.h | 11 ++- + xlators/cluster/ec/src/ec.c | 45 +++++---- + 16 files changed, 431 insertions(+), 274 deletions(-) + create mode 100644 tests/basic/ec/self-heal-read-write-fail.t + create mode 100644 tests/bugs/ec/bug-1699866-check-reopen-fd.t + +diff --git a/tests/basic/ec/self-heal-read-write-fail.t b/tests/basic/ec/self-heal-read-write-fail.t +new file mode 100644 +index 0000000..0ba591b +--- /dev/null ++++ b/tests/basic/ec/self-heal-read-write-fail.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++#This test verifies that self-heal fails when read/write fails as part of heal ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info ++ ++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++TEST touch $M0/a ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++echo abc >> $M0/a ++ ++# Umount the volume to force all pending writes to reach the bricks ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++#Load error-gen and fail read fop and test that heal fails ++TEST $CLI volume stop $V0 #Stop volume so that error-gen can be loaded ++TEST $CLI volume set $V0 debug.error-gen posix ++TEST $CLI volume set $V0 debug.error-fops read ++TEST $CLI volume set $V0 debug.error-number EBADF ++TEST $CLI volume set $V0 debug.error-failure 100 ++ ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++TEST ! getfattr -n trusted.ec.heal $M0/a ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++#fail write fop and test that heal fails ++TEST $CLI volume stop $V0 ++TEST $CLI volume set $V0 debug.error-fops write ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++TEST ! getfattr -n trusted.ec.heal $M0/a ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++TEST $CLI volume stop $V0 #Stop volume so that error-gen can be disabled ++TEST $CLI volume reset $V0 debug.error-gen ++TEST $CLI volume reset $V0 debug.error-fops ++TEST $CLI volume reset $V0 debug.error-number ++TEST $CLI volume reset $V0 debug.error-failure ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++TEST getfattr -n trusted.ec.heal $M0/a ++EXPECT "^0$" get_pending_heal_count $V0 ++ ++#Test that heal worked as expected by forcing read from brick0 ++#remount to make sure data is not served from any cache ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++EXPECT "abc" cat $M0/a ++ ++cleanup +diff --git a/tests/bugs/ec/bug-1699866-check-reopen-fd.t b/tests/bugs/ec/bug-1699866-check-reopen-fd.t +new file mode 100644 +index 0000000..4386d01 +--- /dev/null ++++ b/tests/bugs/ec/bug-1699866-check-reopen-fd.t +@@ -0,0 +1,34 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fileio.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 disperse.background-heals 0 ++TEST $CLI volume set $V0 write-behind off ++TEST $CLI volume set $V0 open-behind off ++TEST $CLI volume start $V0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++TEST mkdir -p $M0/dir ++ ++fd="$(fd_available)" ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0 ++ ++TEST fd_open ${fd} rw $M0/dir/test ++TEST fd_write ${fd} "test1" ++TEST $CLI volume replace-brick ${V0} $H0:$B0/${V0}0 $H0:$B0/${V0}0_1 commit force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++TEST fd_write ${fd} "test2" ++TEST fd_close ${fd} ++ ++cleanup +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 5183680..1454ae2 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -44,16 +44,16 @@ ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status) + UNLOCK(&fd->lock); + } + +-static int +-ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open) ++static uintptr_t ++ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask) + { + int i = 0; + int count = 0; + ec_t *ec = NULL; + ec_fd_t *fd_ctx = NULL; ++ uintptr_t need_open = 0; + + ec = this->private; +- *need_open = 0; + + fd_ctx = ec_fd_get(fd, this); + if (!fd_ctx) +@@ -63,9 +63,9 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open) + { + for (i = 0; i < ec->nodes; i++) { + if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) && +- (ec->xl_up & (1 << i))) { ++ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) { + fd_ctx->fd_status[i] = EC_FD_OPENING; +- *need_open |= (1 << i); ++ need_open |= (1 << i); + count++; + } + } +@@ -76,10 +76,11 @@ ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t *need_open) + * then ignore fixing the fd as it has been + * requested from heal operation. + */ +- if (count >= ec->fragments) +- count = 0; ++ if (count >= ec->fragments) { ++ need_open = 0; ++ } + +- return count; ++ return need_open; + } + + static gf_boolean_t +@@ -96,9 +97,8 @@ ec_is_fd_fixable(fd_t *fd) + } + + static void +-ec_fix_open(ec_fop_data_t *fop) ++ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) + { +- int call_count = 0; + uintptr_t need_open = 0; + int ret = 0; + loc_t loc = { +@@ -109,9 +109,10 @@ ec_fix_open(ec_fop_data_t *fop) + goto out; + + /* Evaluate how many remote fd's to be opened */ +- call_count = ec_fd_ctx_need_open(fop->fd, fop->xl, &need_open); +- if (!call_count) ++ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask); ++ if (need_open == 0) { + goto out; ++ } + + loc.inode = inode_ref(fop->fd->inode); + gf_uuid_copy(loc.gfid, fop->fd->inode->gfid); +@@ -121,11 +122,13 @@ ec_fix_open(ec_fop_data_t *fop) + } + + if (IA_IFDIR == fop->fd->inode->ia_type) { +- ec_opendir(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL, ++ ec_opendir(fop->frame, fop->xl, need_open, ++ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, + &fop->loc[0], fop->fd, NULL); + } else { +- ec_open(fop->frame, fop->xl, need_open, EC_MINIMUM_ONE, NULL, NULL, +- &loc, fop->fd->flags, fop->fd, NULL); ++ ec_open(fop->frame, fop->xl, need_open, ++ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc, ++ fop->fd->flags, fop->fd, NULL); + } + + out: +@@ -495,12 +498,16 @@ ec_resume(ec_fop_data_t *fop, int32_t error) + } + + void +-ec_resume_parent(ec_fop_data_t *fop, int32_t error) ++ec_resume_parent(ec_fop_data_t *fop) + { + ec_fop_data_t *parent; ++ int32_t error = 0; + + parent = fop->parent; + if (parent != NULL) { ++ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) { ++ error = fop->error; ++ } + ec_trace("RESUME_PARENT", fop, "error=%u", error); + fop->parent = NULL; + ec_resume(parent, error); +@@ -593,6 +600,8 @@ ec_internal_op(ec_fop_data_t *fop) + return _gf_true; + if (fop->id == GF_FOP_FXATTROP) + return _gf_true; ++ if (fop->id == GF_FOP_OPEN) ++ return _gf_true; + return _gf_false; + } + +@@ -631,7 +640,7 @@ ec_msg_str(ec_fop_data_t *fop) + return fop->errstr; + } + +-int32_t ++static int32_t + ec_child_select(ec_fop_data_t *fop) + { + ec_t *ec = fop->xl->private; +@@ -693,8 +702,6 @@ ec_child_select(ec_fop_data_t *fop) + return 0; + } + +- ec_sleep(fop); +- + return 1; + } + +@@ -773,6 +780,8 @@ ec_dispatch_one(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = 1; + fop->first = ec_select_first_by_read_policy(fop->xl->private, fop); + +@@ -807,6 +816,8 @@ ec_dispatch_inc(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = gf_bits_count(fop->remaining); + fop->first = 0; + +@@ -820,6 +831,8 @@ ec_dispatch_all(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = gf_bits_count(fop->remaining); + fop->first = 0; + +@@ -838,6 +851,8 @@ ec_dispatch_min(ec_fop_data_t *fop) + ec_dispatch_start(fop); + + if (ec_child_select(fop)) { ++ ec_sleep(fop); ++ + fop->expected = count = ec->fragments; + fop->first = ec_select_first_by_read_policy(fop->xl->private, fop); + idx = fop->first - 1; +@@ -852,6 +867,23 @@ ec_dispatch_min(ec_fop_data_t *fop) + } + } + ++void ++ec_succeed_all(ec_fop_data_t *fop) ++{ ++ ec_dispatch_start(fop); ++ ++ if (ec_child_select(fop)) { ++ fop->expected = gf_bits_count(fop->remaining); ++ fop->first = 0; ++ ++ /* Simulate a successful execution on all bricks */ ++ ec_trace("SUCCEED", fop, ""); ++ ++ fop->good = fop->remaining; ++ fop->remaining = 0; ++ } ++} ++ + ec_lock_t * + ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc) + { +@@ -1825,7 +1857,8 @@ ec_lock_acquired(ec_lock_link_t *link) + + if (fop->use_fd && + (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) { +- ec_fix_open(fop); ++ /* Try to reopen closed fd's only if lock has succeeded. */ ++ ec_fix_open(fop, lock->mask); + } + + ec_lock_resume_shared(&list); +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index 54aaa77..e948342 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -54,9 +54,12 @@ enum _ec_xattrop_flags { + + #define EC_SELFHEAL_BIT 62 + +-#define EC_MINIMUM_ONE -1 +-#define EC_MINIMUM_MIN -2 +-#define EC_MINIMUM_ALL -3 ++#define EC_MINIMUM_ONE (1 << 6) ++#define EC_MINIMUM_MIN (2 << 6) ++#define EC_MINIMUM_ALL (3 << 6) ++#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8) ++#define EC_FOP_MINIMUM(_flags) ((_flags)&255) ++#define EC_FOP_FLAGS(_flags) ((_flags) & ~255) + + #define EC_UPDATE_DATA 1 + #define EC_UPDATE_META 2 +@@ -163,11 +166,14 @@ void + ec_dispatch_one(ec_fop_data_t *fop); + + void ++ec_succeed_all(ec_fop_data_t *fop); ++ ++void + ec_sleep(ec_fop_data_t *fop); + void + ec_resume(ec_fop_data_t *fop, int32_t error); + void +-ec_resume_parent(ec_fop_data_t *fop, int32_t error); ++ec_resume_parent(ec_fop_data_t *fop); + + void + ec_manager(ec_fop_data_t *fop, int32_t error); +diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c +index fae8843..6ef9340 100644 +--- a/xlators/cluster/ec/src/ec-data.c ++++ b/xlators/cluster/ec/src/ec-data.c +@@ -98,7 +98,7 @@ ec_cbk_data_destroy(ec_cbk_data_t *cbk) + + ec_fop_data_t * + ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id, +- uint32_t flags, uintptr_t target, int32_t minimum, ++ uint32_t flags, uintptr_t target, uint32_t fop_flags, + ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks, + void *data) + { +@@ -151,7 +151,8 @@ ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id, + fop->refs = 1; + + fop->flags = flags; +- fop->minimum = minimum; ++ fop->minimum = EC_FOP_MINIMUM(fop_flags); ++ fop->fop_flags = EC_FOP_FLAGS(fop_flags); + fop->mask = target; + + fop->wind = wind; +@@ -271,7 +272,7 @@ ec_fop_data_release(ec_fop_data_t *fop) + loc_wipe(&fop->loc[1]); + GF_FREE(fop->errstr); + +- ec_resume_parent(fop, fop->error); ++ ec_resume_parent(fop); + + ec_fop_cleanup(fop); + +diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h +index 112536d..c8a74ff 100644 +--- a/xlators/cluster/ec/src/ec-data.h ++++ b/xlators/cluster/ec/src/ec-data.h +@@ -18,7 +18,7 @@ ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop, + int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno); + ec_fop_data_t * + ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id, +- uint32_t flags, uintptr_t target, int32_t minimum, ++ uint32_t flags, uintptr_t target, uint32_t fop_flags, + ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks, + void *data); + void +diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c +index c9db701..8310d4a 100644 +--- a/xlators/cluster/ec/src/ec-dir-read.c ++++ b/xlators/cluster/ec/src/ec-dir-read.c +@@ -219,7 +219,7 @@ ec_manager_opendir(ec_fop_data_t *fop, int32_t state) + + void + ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc, + fd_t *fd, dict_t *xdata) + { + ec_cbk_t callback = {.opendir = func}; +@@ -233,7 +233,7 @@ ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_opendir, ++ target, fop_flags, ec_wind_opendir, + ec_manager_opendir, callback, data); + if (fop == NULL) { + goto out; +@@ -515,7 +515,7 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state) + + void + ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.readdir = func}; +@@ -529,7 +529,7 @@ ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_readdir, ++ target, fop_flags, ec_wind_readdir, + ec_manager_readdir, callback, data); + if (fop == NULL) { + goto out; +@@ -585,7 +585,7 @@ ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.readdirp = func}; +@@ -599,7 +599,7 @@ ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_readdirp, ec_manager_readdir, callback, data); + if (fop == NULL) { + goto out; +diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c +index e24667f..0b8ee21 100644 +--- a/xlators/cluster/ec/src/ec-dir-write.c ++++ b/xlators/cluster/ec/src/ec-dir-write.c +@@ -262,7 +262,7 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state) + + void + ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) + { + ec_cbk_t callback = {.create = func}; +@@ -275,7 +275,7 @@ ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags, + ec_wind_create, ec_manager_create, callback, + data); + if (fop == NULL) { +@@ -432,9 +432,9 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) ++ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) + { + ec_cbk_t callback = {.link = func}; + ec_fop_data_t *fop = NULL; +@@ -446,7 +446,7 @@ ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags, + ec_wind_link, ec_manager_link, callback, data); + if (fop == NULL) { + goto out; +@@ -613,9 +613,9 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode, +- mode_t umask, dict_t *xdata) ++ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, mode_t umask, dict_t *xdata) + { + ec_cbk_t callback = {.mkdir = func}; + ec_fop_data_t *fop = NULL; +@@ -627,7 +627,7 @@ ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags, + ec_wind_mkdir, ec_manager_mkdir, callback, data); + if (fop == NULL) { + goto out; +@@ -815,9 +815,9 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev, +- mode_t umask, dict_t *xdata) ++ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) + { + ec_cbk_t callback = {.mknod = func}; + ec_fop_data_t *fop = NULL; +@@ -829,7 +829,7 @@ ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags, + ec_wind_mknod, ec_manager_mknod, callback, data); + if (fop == NULL) { + goto out; +@@ -975,7 +975,7 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state) + + void + ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc, ++ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) + { + ec_cbk_t callback = {.rename = func}; +@@ -988,7 +988,7 @@ ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags, + ec_wind_rename, ec_manager_rename, callback, + data); + if (fop == NULL) { +@@ -1125,9 +1125,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags, +- dict_t *xdata) ++ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc, ++ int xflags, dict_t *xdata) + { + ec_cbk_t callback = {.rmdir = func}; + ec_fop_data_t *fop = NULL; +@@ -1139,7 +1139,7 @@ ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags, + ec_wind_rmdir, ec_manager_rmdir, callback, data); + if (fop == NULL) { + goto out; +@@ -1281,7 +1281,7 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state) + + void + ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_symlink_cbk_t func, void *data, ++ uint32_t fop_flags, fop_symlink_cbk_t func, void *data, + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) + { + ec_cbk_t callback = {.symlink = func}; +@@ -1294,9 +1294,9 @@ ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, minimum, +- ec_wind_symlink, ec_manager_symlink, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target, ++ fop_flags, ec_wind_symlink, ec_manager_symlink, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1435,7 +1435,7 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state) + + void + ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc, + int xflags, dict_t *xdata) + { + ec_cbk_t callback = {.unlink = func}; +@@ -1448,7 +1448,7 @@ ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags, + ec_wind_unlink, ec_manager_unlink, callback, + data); + if (fop == NULL) { +diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h +index 2abef0d..07edf8a 100644 +--- a/xlators/cluster/ec/src/ec-fops.h ++++ b/xlators/cluster/ec/src/ec-fops.h +@@ -18,233 +18,237 @@ + + void + ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc, + int32_t mask, dict_t *xdata); + + void + ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_create_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata); + + void + ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_entrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); + + void + ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fentrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data, + const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata); + + void +-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata); ++ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata); + + void +-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync, +- dict_t *xdata); ++ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd, ++ int32_t datasync, dict_t *xdata); + + void + ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, + int32_t datasync, dict_t *xdata); + + void + ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc, + const char *name, dict_t *xdata); + + void + ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, + const char *name, dict_t *xdata); + + void +-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial, +- dict_t *xdata); ++ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc, ++ int32_t partial, dict_t *xdata); + + void +-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial, +- dict_t *xdata); ++ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd, ++ int32_t partial, dict_t *xdata); + + void + ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func, + void *data, const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata); + + void + ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func, + void *data, const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata); + + void +-ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_link_cbk_t func, void *data, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata); ++ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata); + + void +-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, ++ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags, + fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata); + + void + ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc, + dict_t *xdata); + + void +-ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mkdir_cbk_t func, void *data, loc_t *loc, mode_t mode, +- mode_t umask, dict_t *xdata); ++ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, mode_t umask, dict_t *xdata); + + void +-ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_mknod_cbk_t func, void *data, loc_t *loc, mode_t mode, dev_t rdev, +- mode_t umask, dict_t *xdata); ++ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc, ++ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata); + + void +-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd, +- dict_t *xdata); ++ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc, ++ int32_t flags, fd_t *fd, dict_t *xdata); + + void + ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_opendir_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc, + fd_t *fd, dict_t *xdata); + + void + ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata); + + void + ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readdirp_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd, + size_t size, off_t offset, dict_t *xdata); + + void + ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc, + size_t size, dict_t *xdata); + + void +-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset, +- uint32_t flags, dict_t *xdata); ++ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd, ++ size_t size, off_t offset, uint32_t flags, dict_t *xdata); + + void + ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_removexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data, + loc_t *loc, const char *name, dict_t *xdata); + + void + ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fremovexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data, + fd_t *fd, const char *name, dict_t *xdata); + + void + ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_rename_cbk_t func, void *data, loc_t *oldloc, ++ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc, + loc_t *newloc, dict_t *xdata); + + void +-ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_rmdir_cbk_t func, void *data, loc_t *loc, int xflags, +- dict_t *xdata); ++ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc, ++ int xflags, dict_t *xdata); + + void + ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + + void + ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata); + + void + ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata); + + void + ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata); + + void +-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata); ++ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc, ++ dict_t *xdata); + + void +-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata); ++ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata); + + void + ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc, + dict_t *xdata); + + void + ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_symlink_cbk_t func, void *data, ++ uint32_t fop_flags, fop_symlink_cbk_t func, void *data, + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata); + + void + ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata); + + void + ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd, + off_t offset, size_t len, dict_t *xdata); + + void + ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc, + off_t offset, dict_t *xdata); + + void + ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd, + off_t offset, dict_t *xdata); + + void + ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_unlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc, + int xflags, dict_t *xdata); + + void + ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata); + + void + ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + + void + ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + + void +-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata); ++ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd, ++ off_t offset, gf_seek_what_t what, dict_t *xdata); + + void +-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata); ++ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op, ++ dict_t *xdata); + + #endif /* __EC_FOPS_H__ */ +diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c +index 175e88a..acc16b5 100644 +--- a/xlators/cluster/ec/src/ec-generic.c ++++ b/xlators/cluster/ec/src/ec-generic.c +@@ -151,8 +151,9 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_flush_cbk_t func, void *data, fd_t *fd, dict_t *xdata) ++ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata) + { + ec_cbk_t callback = {.flush = func}; + ec_fop_data_t *fop = NULL; +@@ -164,7 +165,7 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, + ec_wind_flush, ec_manager_flush, callback, data); + if (fop == NULL) { + goto out; +@@ -366,9 +367,9 @@ ec_manager_fsync(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fsync_cbk_t func, void *data, fd_t *fd, int32_t datasync, +- dict_t *xdata) ++ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd, ++ int32_t datasync, dict_t *xdata) + { + ec_cbk_t callback = {.fsync = func}; + ec_fop_data_t *fop = NULL; +@@ -380,7 +381,7 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, + ec_wind_fsync, ec_manager_fsync, callback, data); + if (fop == NULL) { + goto out; +@@ -553,7 +554,7 @@ ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state) + + void + ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd, + int32_t datasync, dict_t *xdata) + { + ec_cbk_t callback = {.fsyncdir = func}; +@@ -566,9 +567,9 @@ ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, minimum, +- ec_wind_fsyncdir, ec_manager_fsyncdir, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target, ++ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -848,7 +849,7 @@ ec_manager_lookup(ec_fop_data_t *fop, int32_t state) + + void + ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_lookup_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc, + dict_t *xdata) + { + ec_cbk_t callback = {.lookup = func}; +@@ -862,7 +863,7 @@ ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_lookup, ++ target, fop_flags, ec_wind_lookup, + ec_manager_lookup, callback, data); + if (fop == NULL) { + goto out; +@@ -1033,7 +1034,7 @@ ec_manager_statfs(ec_fop_data_t *fop, int32_t state) + + void + ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_statfs_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc, + dict_t *xdata) + { + ec_cbk_t callback = {.statfs = func}; +@@ -1047,7 +1048,7 @@ ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_statfs, ++ target, fop_flags, ec_wind_statfs, + ec_manager_statfs, callback, data); + if (fop == NULL) { + goto out; +@@ -1270,7 +1271,7 @@ ec_manager_xattrop(ec_fop_data_t *fop, int32_t state) + + void + ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_xattrop_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { + ec_cbk_t callback = {.xattrop = func}; +@@ -1283,9 +1284,9 @@ ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, minimum, +- ec_wind_xattrop, ec_manager_xattrop, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target, ++ fop_flags, ec_wind_xattrop, ec_manager_xattrop, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1343,7 +1344,7 @@ ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fxattrop_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { + ec_cbk_t callback = {.fxattrop = func}; +@@ -1356,9 +1357,9 @@ ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, minimum, +- ec_wind_fxattrop, ec_manager_xattrop, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target, ++ fop_flags, ec_wind_fxattrop, ec_manager_xattrop, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1507,8 +1508,9 @@ ec_manager_ipc(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_ipc_cbk_t func, void *data, int32_t op, dict_t *xdata) ++ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op, ++ dict_t *xdata) + { + ec_cbk_t callback = {.ipc = func}; + ec_fop_data_t *fop = NULL; +@@ -1520,7 +1522,7 @@ ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags, + ec_wind_ipc, ec_manager_ipc, callback, data); + if (fop == NULL) { + goto out; +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 1ca12c1..3aa04fb 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -367,16 +367,16 @@ ec_heal_data_block(ec_heal_t *heal) + /* FOP: fheal */ + + void +-ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fheal_cbk_t func, void *data, fd_t *fd, int32_t partial, +- dict_t *xdata) ++ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd, ++ int32_t partial, dict_t *xdata) + { + ec_fd_t *ctx = ec_fd_get(fd, this); + + if (ctx != NULL) { + gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags, + ctx->open); +- ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial, ++ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial, + xdata); + } + } +@@ -1975,7 +1975,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + /*Takes lock */ + void + ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_heal_cbk_t func, ec_heal_t *heal) ++ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal) + { + ec_cbk_t callback = {.heal = func}; + ec_fop_data_t *fop = NULL; +@@ -1986,7 +1986,7 @@ ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target, + VALIDATE_OR_GOTO(this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags, + NULL, ec_manager_heal_block, callback, heal); + if (fop == NULL) + goto out; +@@ -2761,9 +2761,9 @@ ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop) + } + + void +-ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_heal_cbk_t func, void *data, loc_t *loc, int32_t partial, +- dict_t *xdata) ++ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc, ++ int32_t partial, dict_t *xdata) + { + ec_cbk_t callback = {.heal = func}; + ec_fop_data_t *fop = NULL; +@@ -2779,7 +2779,7 @@ ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + + if (frame && frame->local) + goto fail; +- fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags, + NULL, NULL, callback, data); + + err = ENOMEM; +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index 55e5934..f87a94a 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -135,7 +135,7 @@ ec_manager_access(ec_fop_data_t *fop, int32_t state) + + void + ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_access_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc, + int32_t mask, dict_t *xdata) + { + ec_cbk_t callback = {.access = func}; +@@ -149,7 +149,7 @@ ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_access, ++ target, fop_flags, ec_wind_access, + ec_manager_access, callback, data); + if (fop == NULL) { + goto out; +@@ -446,7 +446,7 @@ out: + + void + ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_getxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc, + const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.getxattr = func}; +@@ -468,7 +468,7 @@ ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + } + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_getxattr, ec_manager_getxattr, callback, data); + if (fop == NULL) { + goto out; +@@ -588,7 +588,7 @@ ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd, + const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.fgetxattr = func}; +@@ -602,7 +602,7 @@ ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_fgetxattr, ec_manager_getxattr, callback, data); + if (fop == NULL) { + goto out; +@@ -869,9 +869,9 @@ ec_manager_open(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_open_cbk_t func, void *data, loc_t *loc, int32_t flags, fd_t *fd, +- dict_t *xdata) ++ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc, ++ int32_t flags, fd_t *fd, dict_t *xdata) + { + ec_cbk_t callback = {.open = func}; + ec_fop_data_t *fop = NULL; +@@ -884,7 +884,7 @@ ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_open, ec_manager_open, ++ target, fop_flags, ec_wind_open, ec_manager_open, + callback, data); + if (fop == NULL) { + goto out; +@@ -1071,7 +1071,7 @@ ec_manager_readlink(ec_fop_data_t *fop, int32_t state) + + void + ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_readlink_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc, + size_t size, dict_t *xdata) + { + ec_cbk_t callback = {.readlink = func}; +@@ -1085,7 +1085,7 @@ ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate( +- frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, minimum, ++ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags, + ec_wind_readlink, ec_manager_readlink, callback, data); + if (fop == NULL) { + goto out; +@@ -1417,9 +1417,9 @@ ec_manager_readv(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_readv_cbk_t func, void *data, fd_t *fd, size_t size, off_t offset, +- uint32_t flags, dict_t *xdata) ++ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd, ++ size_t size, off_t offset, uint32_t flags, dict_t *xdata) + { + ec_cbk_t callback = {.readv = func}; + ec_fop_data_t *fop = NULL; +@@ -1432,8 +1432,8 @@ ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_readv, ec_manager_readv, +- callback, data); ++ target, fop_flags, ec_wind_readv, ++ ec_manager_readv, callback, data); + if (fop == NULL) { + goto out; + } +@@ -1637,9 +1637,9 @@ ec_manager_seek(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_seek_cbk_t func, void *data, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata) ++ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd, ++ off_t offset, gf_seek_what_t what, dict_t *xdata) + { + ec_cbk_t callback = {.seek = func}; + ec_fop_data_t *fop = NULL; +@@ -1652,7 +1652,7 @@ ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_seek, ec_manager_seek, ++ target, fop_flags, ec_wind_seek, ec_manager_seek, + callback, data); + if (fop == NULL) { + goto out; +@@ -1855,8 +1855,9 @@ ec_manager_stat(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_stat_cbk_t func, void *data, loc_t *loc, dict_t *xdata) ++ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc, ++ dict_t *xdata) + { + ec_cbk_t callback = {.stat = func}; + ec_fop_data_t *fop = NULL; +@@ -1869,7 +1870,7 @@ ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_stat, ec_manager_stat, ++ target, fop_flags, ec_wind_stat, ec_manager_stat, + callback, data); + if (fop == NULL) { + goto out; +@@ -1965,8 +1966,9 @@ ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + } + + void +-ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, +- fop_fstat_cbk_t func, void *data, fd_t *fd, dict_t *xdata) ++ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, ++ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd, ++ dict_t *xdata) + { + ec_cbk_t callback = {.fstat = func}; + ec_fop_data_t *fop = NULL; +@@ -1979,8 +1981,8 @@ ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED, +- target, minimum, ec_wind_fstat, ec_manager_stat, +- callback, data); ++ target, fop_flags, ec_wind_fstat, ++ ec_manager_stat, callback, data); + if (fop == NULL) { + goto out; + } +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index e7b34e6..a903664 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -281,7 +281,7 @@ ec_manager_xattr(ec_fop_data_t *fop, int32_t state) + + void + ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_removexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data, + loc_t *loc, const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.removexattr = func}; +@@ -295,7 +295,7 @@ ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target, +- minimum, ec_wind_removexattr, ec_manager_xattr, ++ fop_flags, ec_wind_removexattr, ec_manager_xattr, + callback, data); + if (fop == NULL) { + goto out; +@@ -361,7 +361,7 @@ ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fremovexattr_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data, + fd_t *fd, const char *name, dict_t *xdata) + { + ec_cbk_t callback = {.fremovexattr = func}; +@@ -375,8 +375,8 @@ ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target, +- minimum, ec_wind_fremovexattr, ec_manager_xattr, +- callback, data); ++ fop_flags, ec_wind_fremovexattr, ++ ec_manager_xattr, callback, data); + if (fop == NULL) { + goto out; + } +@@ -550,7 +550,7 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state) + + void + ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) + { + ec_cbk_t callback = {.setattr = func}; +@@ -563,9 +563,9 @@ ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, minimum, +- ec_wind_setattr, ec_manager_setattr, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target, ++ fop_flags, ec_wind_setattr, ec_manager_setattr, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -627,7 +627,7 @@ ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) + { + ec_cbk_t callback = {.fsetattr = func}; +@@ -640,9 +640,9 @@ ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, minimum, +- ec_wind_fsetattr, ec_manager_setattr, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target, ++ fop_flags, ec_wind_fsetattr, ec_manager_setattr, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -707,7 +707,7 @@ ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_setxattr_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) + { + ec_cbk_t callback = {.setxattr = func}; +@@ -720,9 +720,9 @@ ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, minimum, +- ec_wind_setxattr, ec_manager_xattr, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target, ++ fop_flags, ec_wind_setxattr, ec_manager_xattr, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -825,7 +825,7 @@ ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata) + { + ec_cbk_t callback = {.fsetxattr = func}; +@@ -839,7 +839,7 @@ ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target, +- minimum, ec_wind_fsetxattr, ec_manager_xattr, ++ fop_flags, ec_wind_fsetxattr, ec_manager_xattr, + callback, data); + if (fop == NULL) { + goto out; +@@ -1035,7 +1035,7 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state) + + void + ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fallocate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata) + { + ec_cbk_t callback = {.fallocate = func}; +@@ -1049,8 +1049,8 @@ ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target, +- minimum, ec_wind_fallocate, ec_manager_fallocate, +- callback, data); ++ fop_flags, ec_wind_fallocate, ++ ec_manager_fallocate, callback, data); + if (fop == NULL) { + goto out; + } +@@ -1209,8 +1209,8 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state) + ec_dispatch_all(fop); + return EC_STATE_DELAYED_START; + } else { +- /*Assume discard to have succeeded on mask*/ +- fop->good = fop->mask; ++ /* Assume discard to have succeeded on all bricks */ ++ ec_succeed_all(fop); + } + + /* Fall through */ +@@ -1289,7 +1289,7 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state) + + void + ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_discard_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) + { + ec_cbk_t callback = {.discard = func}; +@@ -1302,9 +1302,9 @@ ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, minimum, +- ec_wind_discard, ec_manager_discard, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target, ++ fop_flags, ec_wind_discard, ec_manager_discard, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1530,7 +1530,7 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state) + + void + ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_truncate_cbk_t func, void *data, loc_t *loc, ++ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc, + off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.truncate = func}; +@@ -1543,9 +1543,9 @@ ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, minimum, +- ec_wind_truncate, ec_manager_truncate, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target, ++ fop_flags, ec_wind_truncate, ec_manager_truncate, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1604,7 +1604,7 @@ ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_ftruncate_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd, + off_t offset, dict_t *xdata) + { + ec_cbk_t callback = {.ftruncate = func}; +@@ -1618,8 +1618,8 @@ ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target, +- minimum, ec_wind_ftruncate, ec_manager_truncate, +- callback, data); ++ fop_flags, ec_wind_ftruncate, ++ ec_manager_truncate, callback, data); + if (fop == NULL) { + goto out; + } +@@ -2262,7 +2262,7 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state) + + void + ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_writev_cbk_t func, void *data, fd_t *fd, ++ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) + { +@@ -2276,7 +2276,7 @@ ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags, + ec_wind_writev, ec_manager_writev, callback, + data); + if (fop == NULL) { +diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c +index f978af0..ffcac07 100644 +--- a/xlators/cluster/ec/src/ec-locks.c ++++ b/xlators/cluster/ec/src/ec-locks.c +@@ -275,7 +275,7 @@ ec_manager_entrylk(ec_fop_data_t *fop, int32_t state) + + void + ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_entrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) + { +@@ -288,9 +288,9 @@ ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, minimum, +- ec_wind_entrylk, ec_manager_entrylk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target, ++ fop_flags, ec_wind_entrylk, ec_manager_entrylk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -403,7 +403,7 @@ ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, +- int32_t minimum, fop_fentrylk_cbk_t func, void *data, ++ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data, + const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) + { +@@ -416,9 +416,9 @@ ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, minimum, +- ec_wind_fentrylk, ec_manager_entrylk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target, ++ fop_flags, ec_wind_fentrylk, ec_manager_entrylk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -650,7 +650,7 @@ ec_manager_inodelk(ec_fop_data_t *fop, int32_t state) + + void + ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func, + void *data, const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +@@ -664,9 +664,9 @@ ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, minimum, +- ec_wind_inodelk, ec_manager_inodelk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target, ++ fop_flags, ec_wind_inodelk, ec_manager_inodelk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -782,7 +782,7 @@ ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx) + + void + ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +- uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, ++ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func, + void *data, const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +@@ -796,9 +796,9 @@ ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, minimum, +- ec_wind_finodelk, ec_manager_inodelk, callback, +- data); ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target, ++ fop_flags, ec_wind_finodelk, ec_manager_inodelk, ++ callback, data); + if (fop == NULL) { + goto out; + } +@@ -1032,7 +1032,7 @@ ec_manager_lk(ec_fop_data_t *fop, int32_t state) + } + + void +-ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, ++ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags, + fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +@@ -1045,7 +1045,7 @@ ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, int32_t minimum, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, minimum, ++ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags, + ec_wind_lk, ec_manager_lk, callback, data); + if (fop == NULL) { + goto out; +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 6ae4a2b..1c295c0 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -308,9 +308,9 @@ struct _ec_fop_data { + int32_t id; /* ID of the file operation */ + int32_t refs; + int32_t state; +- int32_t minimum; /* Minimum number of successful +- operation required to conclude a +- fop as successful */ ++ uint32_t minimum; /* Minimum number of successful ++ operation required to conclude a ++ fop as successful */ + int32_t expected; + int32_t winds; + int32_t jobs; +@@ -325,11 +325,12 @@ struct _ec_fop_data { + ec_cbk_data_t *answer; /* accepted answer */ + int32_t lock_count; + int32_t locked; ++ gf_lock_t lock; + ec_lock_link_t locks[2]; + int32_t first_lock; +- gf_lock_t lock; + +- uint32_t flags; ++ uint32_t fop_flags; /* Flags passed by the caller. */ ++ uint32_t flags; /* Internal flags. */ + uint32_t first; + uintptr_t mask; + uintptr_t healing; /*Dispatch is done but call is successful only +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 13ffeb9..3c8013e 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -797,11 +797,12 @@ ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ uint32_t fop_flags = EC_MINIMUM_ALL; ++ + if (cmd == ENTRYLK_UNLOCK) +- minimum = EC_MINIMUM_ONE; +- ec_entrylk(frame, this, -1, minimum, default_entrylk_cbk, NULL, volume, loc, +- basename, cmd, type, xdata); ++ fop_flags = EC_MINIMUM_ONE; ++ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume, ++ loc, basename, cmd, type, xdata); + + return 0; + } +@@ -811,10 +812,11 @@ ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ uint32_t fop_flags = EC_MINIMUM_ALL; ++ + if (cmd == ENTRYLK_UNLOCK) +- minimum = EC_MINIMUM_ONE; +- ec_fentrylk(frame, this, -1, minimum, default_fentrylk_cbk, NULL, volume, ++ fop_flags = EC_MINIMUM_ONE; ++ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume, + fd, basename, cmd, type, xdata); + + return 0; +@@ -905,7 +907,7 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + { + int error = 0; + ec_t *ec = this->private; +- int32_t minimum = EC_MINIMUM_ONE; ++ int32_t fop_flags = EC_MINIMUM_ONE; + + if (name && strcmp(name, EC_XATTR_HEAL) != 0) { + EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out); +@@ -920,11 +922,11 @@ ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + + if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) || + XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) { +- minimum = EC_MINIMUM_ALL; ++ fop_flags = EC_MINIMUM_ALL; + } + +- ec_getxattr(frame, this, -1, minimum, default_getxattr_cbk, NULL, loc, name, +- xdata); ++ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc, ++ name, xdata); + + return 0; + out: +@@ -954,11 +956,12 @@ int32_t + ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ int32_t fop_flags = EC_MINIMUM_ALL; ++ + if (flock->l_type == F_UNLCK) +- minimum = EC_MINIMUM_ONE; ++ fop_flags = EC_MINIMUM_ONE; + +- ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum, ++ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags, + default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata); + + return 0; +@@ -968,10 +971,11 @@ int32_t + ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ int32_t fop_flags = EC_MINIMUM_ALL; ++ + if (flock->l_type == F_UNLCK) +- minimum = EC_MINIMUM_ONE; +- ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum, ++ fop_flags = EC_MINIMUM_ONE; ++ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags, + default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata); + + return 0; +@@ -991,10 +995,11 @@ int32_t + ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +- int32_t minimum = EC_MINIMUM_ALL; ++ int32_t fop_flags = EC_MINIMUM_ALL; ++ + if (flock->l_type == F_UNLCK) +- minimum = EC_MINIMUM_ONE; +- ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd, flock, ++ fop_flags = EC_MINIMUM_ONE; ++ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock, + xdata); + + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0117-spec-Remove-thin-arbiter-package.patch b/SOURCES/0117-spec-Remove-thin-arbiter-package.patch new file mode 100644 index 0000000..47fbffc --- /dev/null +++ b/SOURCES/0117-spec-Remove-thin-arbiter-package.patch @@ -0,0 +1,184 @@ +From 70842c77735a655a053ed4a7cb77fec01028355a Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Mon, 22 Apr 2019 12:48:13 +0530 +Subject: [PATCH 117/124] spec: Remove thin-arbiter package + +Thin-arbiter is not supported in downstream. Updated the +code to avoid RPMdiff warnings. Marked thin-arbiter +test cases as bad to avoid nightly runs from reporting +expected failures. + +Label: DOWNSTREAM ONLY + +BUG: 1698436 +Change-Id: Ic36bccdfe1c7039fb7e5ce078a8b64cf71056970 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168406 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + configure.ac | 2 -- + extras/systemd/Makefile.am | 6 ++--- + glusterfs.spec.in | 39 +++++---------------------------- + tests/basic/afr/ta-check-locks.t | 2 ++ + tests/basic/afr/ta-read.t | 2 ++ + tests/basic/afr/ta-shd.t | 2 ++ + tests/basic/afr/ta-write-on-bad-brick.t | 2 ++ + xlators/features/Makefile.am | 2 +- + 8 files changed, 18 insertions(+), 39 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 521671b..3065077 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -116,8 +116,6 @@ AC_CONFIG_FILES([Makefile + xlators/features/Makefile + xlators/features/arbiter/Makefile + xlators/features/arbiter/src/Makefile +- xlators/features/thin-arbiter/Makefile +- xlators/features/thin-arbiter/src/Makefile + xlators/features/changelog/Makefile + xlators/features/changelog/src/Makefile + xlators/features/changelog/lib/Makefile +diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am +index 61446a9..b849775 100644 +--- a/extras/systemd/Makefile.am ++++ b/extras/systemd/Makefile.am +@@ -1,8 +1,8 @@ +-CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service gluster-ta-volume.service +-EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in gluster-ta-volume.service.in ++CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service ++EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in + + if USE_SYSTEMD +-systemd_DATA = gluster-ta-volume.service ++systemd_DATA = + endif + + if WITH_SERVER +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ba095b7..bf72a55 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -682,18 +682,6 @@ is in user space and easily manageable. + This package provides the glusterfs server daemon. + %endif + +-%package thin-arbiter +-Summary: GlusterFS thin-arbiter module +-Requires: %{name}%{?_isa} = %{version}-%{release} +-Requires: %{name}-server%{?_isa} = %{version}-%{release} +- +-%description thin-arbiter +-This package provides a tie-breaker functionality to GlusterFS +-replicate volume. It includes translators required to provide the +-functionality, and also few other scripts required for getting the setup done. +- +-This package provides the glusterfs thin-arbiter translator. +- + %package client-xlators + Summary: GlusterFS client-side translators + Requires: %{name}-libs%{?_isa} = %{version}-%{release} +@@ -1045,14 +1033,6 @@ fi + exit 0 + %endif + +-%preun thin-arbiter +-if [ $1 -eq 0 ]; then +- if [ -f %glusterta_svcfile ]; then +- %service_stop gluster-ta-volume +- %systemd_preun gluster-ta-volume +- fi +-fi +- + ##----------------------------------------------------------------------------- + ## All %%postun should be placed here and keep them sorted + ## +@@ -1188,6 +1168,12 @@ exit 0 + %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* + %endif + ++%exclude %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh ++ ++%if ( 0%{?_without_server:1} ) ++%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol ++%endif ++ + %files api + %exclude %{_libdir}/*.so + # libgfapi files +@@ -1263,19 +1249,6 @@ exit 0 + %{_bindir}/fusermount-glusterfs + %endif + +-%files thin-arbiter +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +-%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features +- %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so +-%dir %{_datadir}/glusterfs/scripts +- %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh +-%config %{_sysconfdir}/glusterfs/thin-arbiter.vol +- +-%if ( 0%{?_with_systemd:1} ) +-%{_unitdir}/gluster-ta-volume.service +-%endif +- +- + %if ( 0%{!?_without_georeplication:1} ) + %files geo-replication + %config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep +diff --git a/tests/basic/afr/ta-check-locks.t b/tests/basic/afr/ta-check-locks.t +index c0102c3..c51aa39 100644 +--- a/tests/basic/afr/ta-check-locks.t ++++ b/tests/basic/afr/ta-check-locks.t +@@ -66,3 +66,5 @@ TEST ta_start_brick_process brick0 + EXPECT_WITHIN $HEAL_TIMEOUT "0" get_lock_count_on_ta + + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t +index f2b3c38..1b36dba 100644 +--- a/tests/basic/afr/ta-read.t ++++ b/tests/basic/afr/ta-read.t +@@ -58,3 +58,5 @@ TEST [ -z $TA_PID ] + # Read must now succeed. + TEST cat $M0/FILE + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t +index bb2e58b..4b1ea85 100644 +--- a/tests/basic/afr/ta-shd.t ++++ b/tests/basic/afr/ta-shd.t +@@ -47,3 +47,5 @@ TEST ta_start_mount_process $M0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 + EXPECT "Hello" cat $M0/a.txt + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t +index 18cb65b..77cbf5f 100644 +--- a/tests/basic/afr/ta-write-on-bad-brick.t ++++ b/tests/basic/afr/ta-write-on-bad-brick.t +@@ -49,3 +49,5 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate + TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 + + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am +index 545c02b..537c148 100644 +--- a/xlators/features/Makefile.am ++++ b/xlators/features/Makefile.am +@@ -4,7 +4,7 @@ endif + + SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ + compress changelog gfid-access snapview-client snapview-server trash \ +- shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ ++ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) \ + utime changetimerecorder + + CLEANFILES = +-- +1.8.3.1 + diff --git a/SOURCES/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch b/SOURCES/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch new file mode 100644 index 0000000..328116b --- /dev/null +++ b/SOURCES/0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch @@ -0,0 +1,31 @@ +From 24c2430e3cd629665851fdb2921d754e3ecef3b4 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Wed, 24 Apr 2019 07:47:32 +0530 +Subject: [PATCH 118/124] tests: mark thin-arbiter test ta.t as bad + +As thin-arbiter isn't packaged and tested at RHGS 3.5 + +Label: DOWNSTREAM ONLY + +BUG: 1698436 +Change-Id: Ideab570f58fc0b2daecb2be4ed4b6740362d0b35 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168613 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + tests/basic/afr/ta.t | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tests/basic/afr/ta.t b/tests/basic/afr/ta.t +index 05d4843..cbd1b56 100644 +--- a/tests/basic/afr/ta.t ++++ b/tests/basic/afr/ta.t +@@ -52,3 +52,5 @@ TEST ! ls $B0/brick0/c.txt + TEST ! ls $B0/brick1/c.txt + + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=0000000 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=0000000 +-- +1.8.3.1 + diff --git a/SOURCES/0119-glusterd-provide-a-way-to-detach-failed-node.patch b/SOURCES/0119-glusterd-provide-a-way-to-detach-failed-node.patch new file mode 100644 index 0000000..fa9198b --- /dev/null +++ b/SOURCES/0119-glusterd-provide-a-way-to-detach-failed-node.patch @@ -0,0 +1,53 @@ +From a325e7b3bbe5c1f67b999f375b83d2e2f1b2c1c6 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 9 Apr 2019 13:56:24 +0530 +Subject: [PATCH 119/124] glusterd: provide a way to detach failed node + +When a gluster node in trusted storage pool has failed +due to hardware issues, volume delete operation fails +saying "Not all peers are up" and peer detach for failed +node fails saying "Brick(s) with peer <peer_ip> exists +in cluster". + +The idea here is to use either replace-brick or remove-brick +command to remove all the bricks hosted by failed node and +then re-attempting the peer detach. This change adds this +trick in peer detach error message. + +> upstream patch : https://review.gluster.org/22534 + +>fixes: bz#1697866 +>Change-Id: I0c58887479d31db603ad8d6535ea9d547880ccc8 +>Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1696334 +Change-Id: I0c58887479d31db603ad8d6535ea9d547880ccc8 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168614 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 6147995..af8a8a4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -4134,8 +4134,11 @@ set_deprobe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr, + + case GF_DEPROBE_BRICK_EXIST: + snprintf(errstr, len, +- "Brick(s) with the peer " +- "%s exist in cluster", ++ "Peer %s hosts one or more bricks. If the peer is in " ++ "not recoverable state then use either replace-brick " ++ "or remove-brick command with force to remove all " ++ "bricks from the peer and attempt the peer detach " ++ "again.", + hostname); + break; + +-- +1.8.3.1 + diff --git a/SOURCES/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch b/SOURCES/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch new file mode 100644 index 0000000..58b86d7 --- /dev/null +++ b/SOURCES/0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch @@ -0,0 +1,62 @@ +From c429d3c63601e6ea15af76aa684c30bbeb746467 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Tue, 23 Apr 2019 13:03:28 +0530 +Subject: [PATCH 120/124] glusterd/shd: Keep a ref on volinfo until attach rpc + execute cbk + +When svc attach execute for multiplexing a daemon, we have to keep +a ref on volinfo until it finish the execution. Because, if the attach +is an aysnc call, then a parallel volume delete can lead to free the +volinfo + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22606/ + +>Change-Id: Ibc02b89557baaed2f63db63d7fb1a7480444ae0d +>fixes: bz#1702185 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: Ibc02b89557baaed2f63db63d7fb1a7480444ae0d +BUG: 1702240 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168616 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 +++ + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 19eca9f..a9eab42 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -452,8 +452,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + } + + if (shd->attached) { ++ glusterd_volinfo_ref(volinfo); ++ /* Unref will happen from glusterd_svc_attach_cbk */ + ret = glusterd_attach_svc(svc, volinfo, flags); + if (ret) { ++ glusterd_volinfo_unref(volinfo); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to attach shd svc(volume=%s) to pid=%d. Starting" + "a new process", +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index 02945b1..f7be394 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -695,6 +695,10 @@ out: + if (flag) { + GF_FREE(flag); + } ++ ++ if (volinfo) ++ glusterd_volinfo_unref(volinfo); ++ + GF_ATOMIC_DEC(conf->blockers); + STACK_DESTROY(frame->root); + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch b/SOURCES/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch new file mode 100644 index 0000000..00aa910 --- /dev/null +++ b/SOURCES/0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch @@ -0,0 +1,42 @@ +From e4209dfb27faeca5544a09474ac524546e5d11e0 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Wed, 24 Apr 2019 18:14:33 +0530 +Subject: [PATCH 121/124] spec: glusterfs-devel for client-build should not + depend on server + +Found that libgfdb.pc was included in client package. +It was earlier removed from glusterfs-devel client package +as a part of: +40eb62a8872ce061416e899fb6c0784b6253ab16 + +Made it back into downstream when tier was introduced again. +Removing it in this patch. + +label: DOWNSTREAM ONLY + +Change-Id: I5fd5f5b0a6b06c677f8ea3693eb0392af51abaf1 +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168670 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Milind Changire <mchangir@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index bf72a55..d20b062 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1210,7 +1210,7 @@ exit 0 + %exclude %{_libdir}/pkgconfig/libgfchangelog.pc + %exclude %{_libdir}/libgfchangelog.so + %if ( 0%{!?_without_tiering:1} ) +-%{_libdir}/pkgconfig/libgfdb.pc ++%exclude %{_libdir}/pkgconfig/libgfdb.pc + %endif + %else + %{_libdir}/pkgconfig/libgfchangelog.pc +-- +1.8.3.1 + diff --git a/SOURCES/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch b/SOURCES/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch new file mode 100644 index 0000000..5d256e2 --- /dev/null +++ b/SOURCES/0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch @@ -0,0 +1,312 @@ +From 2f07d12f902e371d8cb8c76007d558e3a727b56a Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Tue, 9 Apr 2019 18:23:05 +0530 +Subject: [PATCH 122/124] posix/ctime: Fix stat(time attributes) inconsistency + during readdirp + +Problem: + Creation of tar file on gluster volume throws warning +'file changed as we read it' + +Cause: + During readdirp, for few of the files whose inode is not +present, time attributes were served from backend. This caused +the ctime of few files to be different between before readdir +and after readdir by tar. + +Solution: + If ctime feature is enabled and inode is not present, don't +serve the time attributes from backend file, serve it from xattr. + +Backport of: + > Patch: https://review.gluster.org/22540 + > fixes: bz#1698078 + > Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +BUG: 1699709 +Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168687 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/ctime/ctime-readdir.c | 29 +++++++++++++++++ + tests/basic/ctime/ctime-readdir.t | 50 ++++++++++++++++++++++++++++++ + xlators/storage/posix/src/posix-helpers.c | 29 +++++++++++------ + xlators/storage/posix/src/posix-metadata.c | 41 ++++++++++++++---------- + 4 files changed, 123 insertions(+), 26 deletions(-) + create mode 100644 tests/basic/ctime/ctime-readdir.c + create mode 100644 tests/basic/ctime/ctime-readdir.t + +diff --git a/tests/basic/ctime/ctime-readdir.c b/tests/basic/ctime/ctime-readdir.c +new file mode 100644 +index 0000000..8760db2 +--- /dev/null ++++ b/tests/basic/ctime/ctime-readdir.c +@@ -0,0 +1,29 @@ ++#include <stdio.h> ++#include <dirent.h> ++#include <string.h> ++#include <assert.h> ++ ++int ++main(int argc, char **argv) ++{ ++ DIR *dir = NULL; ++ struct dirent *entry = NULL; ++ int ret = 0; ++ char *path = NULL; ++ ++ assert(argc == 2); ++ path = argv[1]; ++ ++ dir = opendir(path); ++ if (!dir) { ++ printf("opendir(%s) failed.\n", path); ++ return -1; ++ } ++ ++ while ((entry = readdir(dir)) != NULL) { ++ } ++ if (dir) ++ closedir(dir); ++ ++ return ret; ++} +diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t +new file mode 100644 +index 0000000..4564fc1 +--- /dev/null ++++ b/tests/basic/ctime/ctime-readdir.t +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3}; ++TEST $CLI volume set $V0 performance.stat-prefetch on ++TEST $CLI volume set $V0 performance.readdir-ahead off ++TEST $CLI volume start $V0; ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++TEST mkdir $M0/dir0 ++TEST "echo hello_world > $M0/dir0/FILE" ++ ++ctime1=$(stat -c %Z $M0/dir0/FILE) ++echo "Mount change time: $ctime1" ++ ++sleep 2 ++ ++#Write to back end directly to modify ctime of backend file ++TEST "echo write_from_backend >> $B0/brick1/dir0/FILE" ++TEST "echo write_from_backend >> $B0/brick2/dir0/FILE" ++TEST "echo write_from_backend >> $B0/brick3/dir0/FILE" ++echo "Backend change time" ++echo "brick1: $(stat -c %Z $B0/brick1/dir0/FILE)" ++echo "brick2: $(stat -c %Z $B0/brick2/dir0/FILE)" ++echo "brick3: $(stat -c %Z $B0/brick3/dir0/FILE)" ++ ++#Stop and start to hit the case of no inode for readdir ++TEST umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++TEST build_tester $(dirname $0)/ctime-readdir.c ++ ++#Do readdir ++TEST ./$(dirname $0)/ctime-readdir $M0/dir0 ++ ++EXPECT "$ctime1" stat -c %Z $M0/dir0/FILE ++echo "Mount change time after readdir $(stat -c %Z $M0/dir0/FILE)" ++ ++cleanup_tester $(dirname $0)/ctime-readdir ++ ++cleanup; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 193afc5..37e33a9 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -832,17 +832,26 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, + + iatt_from_stat(&stbuf, &lstatbuf); + +- if (inode && priv->ctime) { +- if (!inode_locked) { +- ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); ++ if (priv->ctime) { ++ if (inode) { ++ if (!inode_locked) { ++ ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); ++ } else { ++ ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, ++ "posix get mdata failed on gfid: %s", ++ uuid_utoa(inode->gfid)); ++ goto out; ++ } + } else { +- ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); +- } +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, +- "posix get mdata failed on gfid: %s", +- uuid_utoa(inode->gfid)); +- goto out; ++ ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, ++ "posix get mdata failed on path: %s", path); ++ goto out; ++ } + } + } + +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 0ea9099..7ff5225 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -79,6 +79,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + fd_based_fop = _gf_true; + } + if (!(fd_based_fop || real_path_arg)) { ++ GF_VALIDATE_OR_GOTO(this->name, inode, out); + MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); + if (!real_path) { + uuid_utoa_r(inode->gfid, gfid_str); +@@ -114,14 +115,14 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + key, + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), +- uuid_utoa(inode->gfid)); ++ inode ? uuid_utoa(inode->gfid) : "null"); + } else { + gf_msg(this->name, GF_LOG_DEBUG, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed" + " on %s gfid: %s key: %s ", + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), +- uuid_utoa(inode->gfid), key); ++ inode ? uuid_utoa(inode->gfid) : "null", key); + } + op_ret = -1; + goto out; +@@ -148,7 +149,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, + "getxattr failed on " + " on %s gfid: %s key: %s ", + real_path ? real_path : (real_path_arg ? real_path_arg : "null"), +- uuid_utoa(inode->gfid), key); ++ inode ? uuid_utoa(inode->gfid) : "null", key); + goto out; + } + +@@ -233,9 +234,14 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + int ret = -1; + int op_errno = 0; + +- GF_VALIDATE_OR_GOTO(this->name, inode, out); ++ /* Handle readdirp: inode might be null, time attributes should be served ++ * from xattr not from backend's file attributes */ ++ if (inode) { ++ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); ++ } else { ++ ret = -1; ++ } + +- ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); + if (ret == -1 || !mdata) { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { +@@ -251,7 +257,9 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + * is hit when in-memory status is lost due to brick + * down scenario + */ +- __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ if (inode) { ++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ } + } else { + /* Failed to get mdata from disk, xattr missing. + * This happens on two cases. +@@ -278,7 +286,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + */ + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ", +- real_path ? real_path : "null", uuid_utoa(inode->gfid), ++ real_path ? real_path : "null", ++ inode ? uuid_utoa(inode->gfid) : "null", + GF_XATTR_MDATA_KEY); + GF_FREE(mdata); + ret = 0; +@@ -297,6 +306,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + stbuf->ia_atime = mdata->atime.tv_sec; + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } ++ /* Not set in inode context, hence free mdata */ ++ if (!inode) { ++ GF_FREE(mdata); ++ } + + out: + return ret; +@@ -416,6 +429,11 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + } + } + ++ if ((flag->ctime == 0) && (flag->mtime == 0) && (flag->atime == 0)) { ++ ret = 0; ++ goto unlock; ++ } ++ + /* Earlier, mdata was updated only if the existing time is less + * than the time to be updated. This would fail the scenarios + * where mtime can be set to any time using the syscall. Hence +@@ -486,7 +504,6 @@ out: + stbuf->ia_atime_nsec = mdata->atime.tv_nsec; + } + +- + return ret; + } + +@@ -604,10 +621,6 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); +- if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { +- goto out; +- } +- + if (frame->root->ctime.tv_sec == 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed, No ctime : %s gfid:%s", real_path, +@@ -643,9 +656,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + + if (inode && priv->ctime) { + (void)posix_get_parent_mdata_flag(frame->root->flags, &flag); +- if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { +- goto out; +- } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); +@@ -655,7 +665,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + uuid_utoa(inode->gfid)); + } + } +-out: + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0123-ctime-Fix-log-repeated-logging-during-open.patch b/SOURCES/0123-ctime-Fix-log-repeated-logging-during-open.patch new file mode 100644 index 0000000..b51c436 --- /dev/null +++ b/SOURCES/0123-ctime-Fix-log-repeated-logging-during-open.patch @@ -0,0 +1,79 @@ +From 03c0395a1ead769167046713a99662bc5c5233fa Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Fri, 19 Apr 2019 11:34:37 +0530 +Subject: [PATCH 123/124] ctime: Fix log repeated logging during open + +The log "posix set mdata failed, No ctime" logged repeatedly +after the fix [1]. Those could be internal fops. This patch +fixes the same. + +[1] https://review.gluster.org/22540 + +Backport of: + > Patch: https://review.gluster.org/22591 + > fixes: bz#1701457 + > Change-Id: I42799a90b976982cedb0ca11fa224d555eb05650 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +BUG: 1699709 +Change-Id: I42799a90b976982cedb0ca11fa224d555eb05650 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168688 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/storage/posix/src/posix-metadata.c | 15 +++++---------- + 1 file changed, 5 insertions(+), 10 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 7ff5225..e96f222 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -429,11 +429,6 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + } + } + +- if ((flag->ctime == 0) && (flag->mtime == 0) && (flag->atime == 0)) { +- ret = 0; +- goto unlock; +- } +- + /* Earlier, mdata was updated only if the existing time is less + * than the time to be updated. This would fail the scenarios + * where mtime can be set to any time using the syscall. Hence +@@ -621,13 +616,9 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); +- if (frame->root->ctime.tv_sec == 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +- "posix set mdata failed, No ctime : %s gfid:%s", real_path, +- inode ? uuid_utoa(inode->gfid) : "No inode"); ++ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } +- + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); +@@ -656,6 +647,9 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + + if (inode && priv->ctime) { + (void)posix_get_parent_mdata_flag(frame->root->flags, &flag); ++ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { ++ goto out; ++ } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, + &frame->root->ctime, stbuf, &flag, + _gf_false); +@@ -665,6 +659,7 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + uuid_utoa(inode->gfid)); + } + } ++out: + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0124-spec-remove-duplicate-references-to-files.patch b/SOURCES/0124-spec-remove-duplicate-references-to-files.patch new file mode 100644 index 0000000..b8a8c8b --- /dev/null +++ b/SOURCES/0124-spec-remove-duplicate-references-to-files.patch @@ -0,0 +1,39 @@ +From e7112224eebaa91c529397a944e94254e482f48f Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Thu, 25 Apr 2019 13:07:19 +0530 +Subject: [PATCH 124/124] spec: remove duplicate references to files + +Label: DOWNSTREAM ONLY + +Change-Id: I446fbeadaaab96aa215f4fd784d951f825486008 +Signed-off-by: Milind Changire <mchangir@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168735 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index d20b062..86a1527 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1214,7 +1214,6 @@ exit 0 + %endif + %else + %{_libdir}/pkgconfig/libgfchangelog.pc +-%{_libdir}/libgfchangelog.so + %if ( 0%{!?_without_tiering:1} ) + %{_libdir}/pkgconfig/libgfdb.pc + %endif +@@ -1469,7 +1468,6 @@ exit 0 + %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run + %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid +-%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub +-- +1.8.3.1 + diff --git a/SOURCES/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch b/SOURCES/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch new file mode 100644 index 0000000..c1c49a3 --- /dev/null +++ b/SOURCES/0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch @@ -0,0 +1,75 @@ +From 0cd08d9e89f5ee86d5f4f90f0ca5c07bd290636c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Fri, 26 Apr 2019 22:28:53 +0530 +Subject: [PATCH 125/141] glusterd: define dumpops in the xlator_api of + glusterd + +Problem: statedump is not capturing information related to glusterd + +Solution: statdump is not capturing glusterd info because +trav->dumpops is null in gf_proc_dump_single_xlator_info () +where trav is glusterd xlator object. trav->dumpops is null +because we missed to define dumpops in xlator_api of glusterd. +defining dumpops in xlator_api of glusterd fixes the issue. + +> fixes: bz#1703629 +> Change-Id: If85429ecb1ef580aced8d5b88d09fc15258bfc4c +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22640/ + +BUG: 1703753 +Change-Id: If85429ecb1ef580aced8d5b88d09fc15258bfc4c +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169207 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/bugs/glusterd/optimized-basic-testcases.t | 13 +++++++++++++ + xlators/mgmt/glusterd/src/glusterd.c | 1 + + 2 files changed, 14 insertions(+) + +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index dd98a65..d700b5e 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -32,6 +32,16 @@ function get_brick_host_uuid() + echo $host_uuid_list | awk '{print $1}' + } + ++function generate_statedump_and_check_for_glusterd_info { ++ pid=`pidof glusterd` ++ #remove old stale statedumps ++ cleanup_statedump $pid ++ kill -USR1 $pid ++ #Wait till the statedump is generated ++ sleep 1 ++ fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.") ++ cat $statedumpdir/$fname | grep "xlator.glusterd.priv" | wc -l ++} + + cleanup; + +@@ -279,4 +289,7 @@ mkdir -p /xyz/var/lib/glusterd/abc + TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc + EXPECT 'Created' volinfo_field "test" 'Status'; + ++EXPECT "1" generate_statedump_and_check_for_glusterd_info ++ ++cleanup_statedump `pidof glusterd` + cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index d4ab630..c0973cb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -2231,6 +2231,7 @@ xlator_api_t xlator_api = { + .fini = fini, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ ++ .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, +-- +1.8.3.1 + diff --git a/SOURCES/0126-cluster-dht-refactor-dht-lookup-functions.patch b/SOURCES/0126-cluster-dht-refactor-dht-lookup-functions.patch new file mode 100644 index 0000000..25c43a0 --- /dev/null +++ b/SOURCES/0126-cluster-dht-refactor-dht-lookup-functions.patch @@ -0,0 +1,663 @@ +From 6565749c95e90f360a994bde1416cffd22cd8ce9 Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Mon, 25 Mar 2019 15:56:56 +0530 +Subject: [PATCH 126/141] cluster/dht: refactor dht lookup functions + +Part 1: refactor the dht_lookup_dir_cbk +and dht_selfheal_directory functions. +Added a simple dht selfheal directory test + +upstream: https://review.gluster.org/#/c/glusterfs/+/22407/ +> Change-Id: I1410c26359e3c14b396adbe751937a52bd2fcff9 +> updates: bz#1590385 + +Change-Id: Idd0a7df7122d634c371ecf30c0dbb94dc6063416 +BUG: 1703897 +Signed-off-by: N Balachandran <nbalacha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169037 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Susant Palai <spalai@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/distribute/dir-heal.t | 145 +++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 178 +++++++++++++++------------------ + xlators/cluster/dht/src/dht-selfheal.c | 65 +++++++----- + 3 files changed, 264 insertions(+), 124 deletions(-) + create mode 100644 tests/basic/distribute/dir-heal.t + +diff --git a/tests/basic/distribute/dir-heal.t b/tests/basic/distribute/dir-heal.t +new file mode 100644 +index 0000000..851f765 +--- /dev/null ++++ b/tests/basic/distribute/dir-heal.t +@@ -0,0 +1,145 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../nfs.rc ++. $(dirname $0)/../../common-utils.rc ++ ++# Test 1 overview: ++# ---------------- ++# ++# 1. Kill one brick of the volume. ++# 2. Create directories and change directory properties. ++# 3. Bring up the brick and access the directory ++# 4. Check the permissions and xattrs on the backend ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3} ++TEST $CLI volume start $V0 ++ ++# We want the lookup to reach DHT ++TEST $CLI volume set $V0 performance.stat-prefetch off ++ ++# Mount using FUSE , kill a brick and create directories ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ls $M0/ ++cd $M0 ++ ++TEST kill_brick $V0 $H0 $B0/$V0-1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++TEST mkdir dir{1..4} ++ ++# No change for dir1 ++# Change permissions for dir2 ++# Set xattr on dir3 ++# Change permissions and set xattr on dir4 ++ ++TEST chmod 777 $M0/dir2 ++ ++TEST setfattr -n "user.test" -v "test" $M0/dir3 ++ ++TEST chmod 777 $M0/dir4 ++TEST setfattr -n "user.test" -v "test" $M0/dir4 ++ ++ ++# Start all bricks ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++#$CLI volume status ++ ++# It takes a while for the client to reconnect to the brick ++sleep 5 ++ ++stat $M0/dir* > /dev/null ++ ++# Check that directories have been created on the brick that was killed ++ ++TEST ls $B0/$V0-1/dir1 ++ ++TEST ls $B0/$V0-1/dir2 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir2 ++ ++TEST ls $B0/$V0-1/dir3 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3 ++ ++ ++TEST ls $B0/$V0-1/dir4 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir4 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4 ++ ++ ++TEST rm -rf $M0/* ++ ++cd ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++ ++# Test 2 overview: ++# ---------------- ++# 1. Create directories with all bricks up. ++# 2. Kill a brick and change directory properties and set user xattr. ++# 2. Bring up the brick and access the directory ++# 3. Check the permissions and xattrs on the backend ++ ++ ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ls $M0/ ++cd $M0 ++TEST mkdir dir{1..4} ++ ++TEST kill_brick $V0 $H0 $B0/$V0-1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++# No change for dir1 ++# Change permissions for dir2 ++# Set xattr on dir3 ++# Change permissions and set xattr on dir4 ++ ++TEST chmod 777 $M0/dir2 ++ ++TEST setfattr -n "user.test" -v "test" $M0/dir3 ++ ++TEST chmod 777 $M0/dir4 ++TEST setfattr -n "user.test" -v "test" $M0/dir4 ++ ++ ++# Start all bricks ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1 ++ ++#$CLI volume status ++ ++# It takes a while for the client to reconnect to the brick ++sleep 5 ++ ++stat $M0/dir* > /dev/null ++ ++# Check directories on the brick that was killed ++ ++TEST ls $B0/$V0-1/dir2 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir2 ++ ++TEST ls $B0/$V0-1/dir3 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3 ++ ++ ++TEST ls $B0/$V0-1/dir4 ++EXPECT "777" stat -c "%a" $B0/$V0-1/dir4 ++EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4 ++cd ++ ++ ++# Cleanup ++cleanup ++ +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 2a68193..d3e900c 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -801,9 +801,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, + call_frame_t *xattr_frame = NULL; + gf_boolean_t vol_down = _gf_false; + +- this = frame->this; +- + GF_VALIDATE_OR_GOTO("dht", frame, out); ++ this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame->local, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); +@@ -812,6 +811,7 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, + conf = this->private; + layout = local->selfheal.layout; + local->mds_heal_fresh_lookup = mark_during_fresh_lookup; ++ + gf_uuid_unparse(local->gfid, gfid_local); + + /* Code to update hashed subvol consider as a mds subvol +@@ -1240,6 +1240,31 @@ out: + } + + int ++dht_needs_selfheal(call_frame_t *frame, xlator_t *this) ++{ ++ dht_local_t *local = NULL; ++ dht_layout_t *layout = NULL; ++ int needs_selfheal = 0; ++ int ret = 0; ++ ++ local = frame->local; ++ layout = local->layout; ++ ++ if (local->need_attrheal || local->need_xattr_heal || ++ local->need_selfheal) { ++ needs_selfheal = 1; ++ } ++ ++ ret = dht_layout_normalize(this, &local->loc, layout); ++ ++ if (ret != 0) { ++ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path); ++ needs_selfheal = 1; ++ } ++ return needs_selfheal; ++} ++ ++int + dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *postparent) +@@ -1256,8 +1281,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + char gfid_node[GF_UUID_BUF_SIZE] = {0}; + int32_t mds_xattr_val[1] = {0}; +- call_frame_t *copy = NULL; +- dht_local_t *copy_local = NULL; + + GF_VALIDATE_OR_GOTO("dht", frame, out); + GF_VALIDATE_OR_GOTO("dht", this, out); +@@ -1270,7 +1293,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + conf = this->private; + + layout = local->layout; ++ gf_msg_debug(this->name, op_errno, ++ "%s: lookup on %s returned with op_ret = %d, op_errno = %d", ++ local->loc.path, prev->name, op_ret, op_errno); + ++ /* The first successful lookup*/ + if (!op_ret && gf_uuid_is_null(local->gfid)) { + memcpy(local->gfid, stbuf->ia_gfid, 16); + } +@@ -1298,13 +1325,10 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + if (op_ret == -1) { + local->op_errno = op_errno; +- gf_msg_debug(this->name, op_errno, +- "%s: lookup on %s returned error", local->loc.path, +- prev->name); + + /* The GFID is missing on this subvol. Force a heal. */ + if (op_errno == ENODATA) { +- local->need_selfheal = 1; ++ local->need_lookup_everywhere = 1; + } + goto unlock; + } +@@ -1312,12 +1336,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + is_dir = check_is_dir(inode, stbuf, xattr); + if (!is_dir) { + gf_msg_debug(this->name, 0, +- "lookup of %s on %s returned non" +- "dir 0%o" ++ "%s: lookup on %s returned non dir 0%o" + "calling lookup_everywhere", + local->loc.path, prev->name, stbuf->ia_type); + +- local->need_selfheal = 1; ++ local->need_lookup_everywhere = 1; + goto unlock; + } + +@@ -1328,14 +1351,8 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + dht_aggregate_xattr(local->xattr, xattr); + } + +- if (dict_get(xattr, conf->mds_xattr_key)) { +- local->mds_subvol = prev; +- local->mds_stbuf.ia_gid = stbuf->ia_gid; +- local->mds_stbuf.ia_uid = stbuf->ia_uid; +- local->mds_stbuf.ia_prot = stbuf->ia_prot; +- } +- + if (local->stbuf.ia_type != IA_INVAL) { ++ /* This is not the first subvol to respond */ + if (!__is_root_gfid(stbuf->ia_gfid) && + ((local->stbuf.ia_gid != stbuf->ia_gid) || + (local->stbuf.ia_uid != stbuf->ia_uid) || +@@ -1348,65 +1365,64 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + if (local->inode == NULL) + local->inode = inode_ref(inode); + ++ /* This could be a problem */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->postparent, postparent); + + if (!dict_get(xattr, conf->mds_xattr_key)) { + gf_msg_debug(this->name, 0, +- "Internal xattr %s is not present " +- " on path %s gfid is %s ", +- conf->mds_xattr_key, local->loc.path, gfid_local); ++ "%s: mds xattr %s is not present " ++ "on %s(gfid = %s)", ++ local->loc.path, conf->mds_xattr_key, prev->name, ++ gfid_local); + goto unlock; +- } else { +- /* Save mds subvol on inode ctx */ +- ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, +- DHT_MSG_SET_INODE_CTX_FAILED, +- "Failed to set hashed subvol for %s vol is %s", +- local->loc.path, prev->name); +- } ++ } ++ ++ local->mds_subvol = prev; ++ local->mds_stbuf = *stbuf; ++ ++ /* Save mds subvol on inode ctx */ ++ ++ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, ++ "%s: Failed to set mds (%s)", local->loc.path, prev->name); + } + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); + if ((check_mds < 0) && !errst) { + local->mds_xattr = dict_ref(xattr); + gf_msg_debug(this->name, 0, +- "Value of %s is not zero on hashed subvol " +- "so xattr needs to be heal on non hashed" +- " path is %s and vol name is %s " +- " gfid is %s", +- conf->mds_xattr_key, local->loc.path, prev->name, ++ "%s: %s is not zero on %s. Xattrs need to be healed." ++ "(gfid = %s)", ++ local->loc.path, conf->mds_xattr_key, prev->name, + gfid_local); + local->need_xattr_heal = 1; +- local->mds_subvol = prev; + } + } ++ + unlock: + UNLOCK(&frame->lock); + + this_call_cnt = dht_frame_return(frame); + + if (is_last_call(this_call_cnt)) { ++ /* If the mds subvol is not set correctly*/ ++ if (!__is_root_gfid(local->gfid) && ++ (!dict_get(local->xattr, conf->mds_xattr_key))) { ++ local->need_selfheal = 1; ++ } ++ + /* No need to call xattr heal code if volume count is 1 + */ +- if (conf->subvolume_cnt == 1) ++ if (conf->subvolume_cnt == 1) { + local->need_xattr_heal = 0; +- +- /* Code to update all extended attributed from hashed subvol +- to local->xattr +- */ +- if (local->need_xattr_heal && (local->mds_xattr)) { +- dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, +- NULL, NULL); +- dict_unref(local->mds_xattr); +- local->mds_xattr = NULL; + } + +- if (local->need_selfheal) { +- local->need_selfheal = 0; ++ if (local->need_selfheal || local->need_lookup_everywhere) { + /* Set the gfid-req so posix will set the GFID*/ + if (!gf_uuid_is_null(local->gfid)) { ++ /* Ok, this should _never_ happen */ + ret = dict_set_static_bin(local->xattr_req, "gfid-req", + local->gfid, 16); + } else { +@@ -1414,73 +1430,36 @@ unlock: + ret = dict_set_static_bin(local->xattr_req, "gfid-req", + local->gfid_req, 16); + } ++ } ++ ++ if (local->need_lookup_everywhere) { ++ local->need_lookup_everywhere = 0; + dht_lookup_everywhere(frame, this, &local->loc); + return 0; + } + + if (local->op_ret == 0) { +- ret = dht_layout_normalize(this, &local->loc, layout); +- +- if (ret != 0) { +- gf_msg_debug(this->name, 0, "fixing assignment on %s", +- local->loc.path); ++ if (dht_needs_selfheal(frame, this)) { + goto selfheal; + } + + dht_layout_set(this, local->inode, layout); +- if (!dict_get(local->xattr, conf->mds_xattr_key) || +- local->need_xattr_heal) +- goto selfheal; +- } +- +- if (local->inode) { +- dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); +- } +- +- if (local->loc.parent) { +- dht_inode_ctx_time_update(local->loc.parent, this, +- &local->postparent, 1); +- } +- +- if (local->need_attrheal) { +- local->need_attrheal = 0; +- if (!__is_root_gfid(inode->gfid)) { +- local->stbuf.ia_gid = local->mds_stbuf.ia_gid; +- local->stbuf.ia_uid = local->mds_stbuf.ia_uid; +- local->stbuf.ia_prot = local->mds_stbuf.ia_prot; ++ if (local->inode) { ++ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); + } +- copy = create_frame(this, this->ctx->pool); +- if (copy) { +- copy_local = dht_local_init(copy, &local->loc, NULL, 0); +- if (!copy_local) { +- DHT_STACK_DESTROY(copy); +- goto skip_attr_heal; +- } +- copy_local->stbuf = local->stbuf; +- gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid); +- copy_local->mds_stbuf = local->mds_stbuf; +- copy_local->mds_subvol = local->mds_subvol; +- copy->local = copy_local; +- FRAME_SU_DO(copy, dht_local_t); +- ret = synctask_new(this->ctx->env, dht_dir_attr_heal, +- dht_dir_attr_heal_done, copy, copy); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, +- DHT_MSG_DIR_ATTR_HEAL_FAILED, +- "Synctask creation failed to heal attr " +- "for path %s gfid %s ", +- local->loc.path, local->gfid); +- DHT_STACK_DESTROY(copy); +- } ++ ++ if (local->loc.parent) { ++ dht_inode_ctx_time_update(local->loc.parent, this, ++ &local->postparent, 1); + } + } + +- skip_attr_heal: + DHT_STRIP_PHASE1_FLAGS(&local->stbuf); + dht_set_fixed_dir_stat(&local->postparent); + /* Delete mds xattr at the time of STACK UNWIND */ + if (local->xattr) + GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr); ++ + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, local->xattr, + &local->postparent); +@@ -5444,9 +5423,8 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, + DHT_MSG_HASHED_SUBVOL_GET_FAILED, +- "Failed to get mds subvol for path %s" +- "gfid is %s ", +- loc->path, gfid_local); ++ "%s: Failed to get mds subvol. (gfid is %s)", loc->path, ++ gfid_local); + } + (*op_errno) = ENOENT; + goto err; +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index bd1b7ea..5420fca 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -1033,18 +1033,27 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, + int missing_attr = 0; + int i = 0, ret = -1; + dht_local_t *local = NULL; ++ dht_conf_t *conf = NULL; + xlator_t *this = NULL; + int cnt = 0; + + local = frame->local; + this = frame->this; ++ conf = this->private; ++ ++ /* We need to heal the attrs if: ++ * 1. Any directories were missing - the newly created dirs will need ++ * to have the correct attrs set ++ * 2. An existing dir does not have the correct permissions -they may ++ * have been changed when a brick was down. ++ */ + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err == -1) + missing_attr++; + } + +- if (missing_attr == 0) { ++ if ((missing_attr == 0) && (local->need_attrheal == 0)) { + if (!local->heal_layout) { + gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ", + loc->path, uuid_utoa(loc->gfid)); +@@ -1062,19 +1071,12 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, + return 0; + } + +- local->call_cnt = missing_attr; +- cnt = layout->cnt; ++ cnt = local->call_cnt = conf->subvolume_cnt; + + for (i = 0; i < cnt; i++) { +- if (layout->list[i].err == -1) { +- gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s", +- loc->path, layout->list[i].xlator->name, +- uuid_utoa(loc->gfid)); +- +- STACK_WIND( +- frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator, +- layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL); +- } ++ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator, ++ layout->list[i].xlator->fops->setattr, loc, stbuf, valid, ++ NULL); + } + + return 0; +@@ -1492,6 +1494,9 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, + } + + if (missing_dirs == 0) { ++ /* We don't need to create any directories. Proceed to heal the ++ * attrs and xattrs ++ */ + if (!__is_root_gfid(local->stbuf.ia_gfid)) { + if (local->need_xattr_heal) { + local->need_xattr_heal = 0; +@@ -1499,8 +1504,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, + if (ret) + gf_msg(this->name, GF_LOG_ERROR, ret, + DHT_MSG_DIR_XATTR_HEAL_FAILED, +- "xattr heal failed for " +- "directory %s gfid %s ", ++ "%s:xattr heal failed for " ++ "directory (gfid = %s)", + local->loc.path, local->gfid); + } else { + if (!gf_uuid_is_null(local->gfid)) +@@ -1512,8 +1517,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, + + gf_msg(this->name, GF_LOG_INFO, 0, + DHT_MSG_DIR_XATTR_HEAL_FAILED, +- "Failed to set mds xattr " +- "for directory %s gfid %s ", ++ "%s: Failed to set mds xattr " ++ "for directory (gfid = %s)", + local->loc.path, local->gfid); + } + } +@@ -2085,10 +2090,10 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) + { + dht_local_t *local = NULL; ++ xlator_t *this = NULL; + uint32_t down = 0; + uint32_t misc = 0; + int ret = 0; +- xlator_t *this = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + char gfid[GF_UUID_BUF_SIZE] = {0}; + inode_t *linked_inode = NULL, *inode = NULL; +@@ -2099,6 +2104,11 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(this, layout); + ++ if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) { ++ /*Use the one in the mds_stbuf*/ ++ local->stbuf = local->mds_stbuf; ++ } ++ + if (!__is_root_gfid(local->stbuf.ia_gfid)) { + gf_uuid_unparse(local->stbuf.ia_gfid, gfid); + gf_uuid_unparse(loc->parent->gfid, pgfid); +@@ -2118,6 +2128,13 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + inode_unref(inode); + } + ++ if (local->need_xattr_heal && (local->mds_xattr)) { ++ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, ++ NULL, NULL); ++ dict_unref(local->mds_xattr); ++ local->mds_xattr = NULL; ++ } ++ + dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, + &local->selfheal.missing_cnt, &local->selfheal.down, +@@ -2128,18 +2145,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + + if (down) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, +- "Directory selfheal failed: %d subvolumes down." +- "Not fixing. path = %s, gfid = %s", +- down, loc->path, gfid); ++ "%s: Directory selfheal failed: %d subvolumes down." ++ "Not fixing. gfid = %s", ++ loc->path, down, gfid); + ret = 0; + goto sorry_no_fix; + } + + if (misc) { + gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, +- "Directory selfheal failed : %d subvolumes " +- "have unrecoverable errors. path = %s, gfid = %s", +- misc, loc->path, gfid); ++ "%s: Directory selfheal failed : %d subvolumes " ++ "have unrecoverable errors. gfid = %s", ++ loc->path, misc, gfid); + + ret = 0; + goto sorry_no_fix; +@@ -2369,13 +2386,13 @@ dht_dir_attr_heal(void *data) + + frame = data; + local = frame->local; +- mds_subvol = local->mds_subvol; + this = frame->this; + GF_VALIDATE_OR_GOTO("dht", this, out); + GF_VALIDATE_OR_GOTO("dht", local, out); + conf = this->private; + GF_VALIDATE_OR_GOTO("dht", conf, out); + ++ mds_subvol = local->mds_subvol; + call_cnt = conf->subvolume_cnt; + + if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) { +-- +1.8.3.1 + diff --git a/SOURCES/0127-cluster-dht-Refactor-dht-lookup-functions.patch b/SOURCES/0127-cluster-dht-Refactor-dht-lookup-functions.patch new file mode 100644 index 0000000..0d0fdb3 --- /dev/null +++ b/SOURCES/0127-cluster-dht-Refactor-dht-lookup-functions.patch @@ -0,0 +1,200 @@ +From 884ba13ee47888b5de9b6d6acaf051e895f55053 Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Wed, 10 Apr 2019 14:28:55 +0530 +Subject: [PATCH 127/141] cluster/dht: Refactor dht lookup functions + +Part 2: Modify dht_revalidate_cbk to call +dht_selfheal_directory instead of separate calls +to heal attrs and xattrs. + +upstream: https://review.gluster.org/#/c/glusterfs/+/22542/ + +> Change-Id: Id41ac6c4220c2c35484812bbfc6157fc3c86b142 +> updates: bz#1590385 + +Change-Id: Id53962306dd142efc741de838b585fa5c78f9b1f +BUG:1703897 +Signed-off-by: N Balachandran <nbalacha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169038 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Susant Palai <spalai@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 104 ++++++++++------------------------- + 1 file changed, 30 insertions(+), 74 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index d3e900c..183872f 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1365,7 +1365,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + if (local->inode == NULL) + local->inode = inode_ref(inode); + +- /* This could be a problem */ + dht_iatt_merge(this, &local->stbuf, stbuf); + dht_iatt_merge(this, &local->postparent, postparent); + +@@ -1509,8 +1508,6 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int is_dir = 0; + int is_linkfile = 0; + int follow_link = 0; +- call_frame_t *copy = NULL; +- dht_local_t *copy_local = NULL; + char gfid[GF_UUID_BUF_SIZE] = {0}; + uint32_t vol_commit_hash = 0; + xlator_t *subvol = NULL; +@@ -1538,17 +1535,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + gf_uuid_unparse(local->loc.gfid, gfid); + ++ gf_msg_debug(this->name, op_errno, ++ "%s: revalidate lookup on %s returned op_ret %d", ++ local->loc.path, prev->name, op_ret); ++ + LOCK(&frame->lock); + { + if (gf_uuid_is_null(local->gfid)) { + memcpy(local->gfid, local->loc.gfid, 16); + } + +- gf_msg_debug(this->name, op_errno, +- "revalidate lookup of %s " +- "returned with op_ret %d", +- local->loc.path, op_ret); +- + if (op_ret == -1) { + local->op_errno = op_errno; + +@@ -1580,6 +1576,8 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + local->loc.path); + + local->need_lookup_everywhere = 1; ++ } else if (IA_ISDIR(local->loc.inode->ia_type)) { ++ local->need_selfheal = 1; + } + } + +@@ -1638,15 +1636,16 @@ dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + (local->stbuf.ia_uid != stbuf->ia_uid) || + is_permission_different(&local->stbuf.ia_prot, + &stbuf->ia_prot)) { +- local->need_selfheal = 1; ++ local->need_attrheal = 1; + } + } + + if (!dict_get(xattr, conf->mds_xattr_key)) { + gf_msg_debug(this->name, 0, +- "internal xattr %s is not present" +- " on path %s gfid is %s ", +- conf->mds_xattr_key, local->loc.path, gfid); ++ "%s: internal xattr %s is not present" ++ " on subvol %s(gfid is %s)", ++ local->loc.path, conf->mds_xattr_key, prev->name, ++ gfid); + } else { + check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, + mds_xattr_val, 1, &errst); +@@ -1734,71 +1733,28 @@ unlock: + local->need_xattr_heal = 0; + + if (IA_ISDIR(local->stbuf.ia_type)) { +- /* Code to update all extended attributed from hashed +- subvol to local->xattr and call heal code to heal +- custom xattr from hashed subvol to non-hashed subvol +- */ +- if (local->need_xattr_heal && (local->mds_xattr)) { +- dht_dir_set_heal_xattr(this, local, local->xattr, +- local->mds_xattr, NULL, NULL); +- dict_unref(local->mds_xattr); +- local->mds_xattr = NULL; +- local->need_xattr_heal = 0; +- ret = dht_dir_xattr_heal(this, local); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, ret, +- DHT_MSG_DIR_XATTR_HEAL_FAILED, +- "xattr heal failed for directory %s " +- " gfid %s ", +- local->loc.path, gfid); +- } else { +- /* Call function to save hashed subvol on inode +- ctx if internal mds xattr is not present and +- all subvols are up +- */ +- if (inode && !__is_root_gfid(inode->gfid) && (!local->op_ret)) +- (void)dht_common_mark_mdsxattr(frame, NULL, 1); +- } +- } +- if (local->need_selfheal) { +- local->need_selfheal = 0; +- if (!__is_root_gfid(inode->gfid)) { +- gf_uuid_copy(local->gfid, local->mds_stbuf.ia_gfid); +- local->stbuf.ia_gid = local->mds_stbuf.ia_gid; +- local->stbuf.ia_uid = local->mds_stbuf.ia_uid; +- local->stbuf.ia_prot = local->mds_stbuf.ia_prot; +- } else { +- gf_uuid_copy(local->gfid, local->stbuf.ia_gfid); +- local->stbuf.ia_gid = local->prebuf.ia_gid; +- local->stbuf.ia_uid = local->prebuf.ia_uid; +- local->stbuf.ia_prot = local->prebuf.ia_prot; +- } ++ if (!__is_root_gfid(local->loc.inode->gfid) && ++ (!dict_get(local->xattr, conf->mds_xattr_key))) ++ local->need_selfheal = 1; + +- copy = create_frame(this, this->ctx->pool); +- if (copy) { +- copy_local = dht_local_init(copy, &local->loc, NULL, 0); +- if (!copy_local) { +- DHT_STACK_DESTROY(copy); +- goto cont; +- } +- copy_local->stbuf = local->stbuf; +- copy_local->mds_stbuf = local->mds_stbuf; +- copy_local->mds_subvol = local->mds_subvol; +- copy->local = copy_local; +- FRAME_SU_DO(copy, dht_local_t); +- ret = synctask_new(this->ctx->env, dht_dir_attr_heal, +- dht_dir_attr_heal_done, copy, copy); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, +- DHT_MSG_DIR_ATTR_HEAL_FAILED, +- "Synctask creation failed to heal attr " +- "for path %s gfid %s ", +- local->loc.path, local->gfid); +- DHT_STACK_DESTROY(copy); ++ if (dht_needs_selfheal(frame, this)) { ++ if (!__is_root_gfid(local->loc.inode->gfid)) { ++ local->stbuf.ia_gid = local->mds_stbuf.ia_gid; ++ local->stbuf.ia_uid = local->mds_stbuf.ia_uid; ++ local->stbuf.ia_prot = local->mds_stbuf.ia_prot; ++ } else { ++ local->stbuf.ia_gid = local->prebuf.ia_gid; ++ local->stbuf.ia_uid = local->prebuf.ia_uid; ++ local->stbuf.ia_prot = local->prebuf.ia_prot; + } ++ ++ layout = local->layout; ++ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk, ++ &local->loc, layout); ++ return 0; + } + } +- cont: ++ + if (local->layout_mismatch) { + /* Found layout mismatch in the directory, need to + fix this in the inode context */ +@@ -1814,7 +1770,7 @@ unlock: + dht_layout_unref(this, local->layout); + local->layout = NULL; + +- /* We know that current cached subvol is no more ++ /* We know that current cached subvol is no longer + valid, get the new one */ + local->cached_subvol = NULL; + if (local->xattr_req) { +-- +1.8.3.1 + diff --git a/SOURCES/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch b/SOURCES/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch new file mode 100644 index 0000000..862b828 --- /dev/null +++ b/SOURCES/0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch @@ -0,0 +1,86 @@ +From bb39abc1dab3c7b7b725f9eefe119218e94f610b Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Mon, 29 Apr 2019 18:48:36 +0530 +Subject: [PATCH 128/141] glusterd: Fix bulkvoldict thread logic in brick + multiplexing + +Problem: Currently glusterd spawn bulkvoldict in brick_mux + environment while no. of volumes are less than configured + glusterd.vol_count_per_thread + +Solution: Correct the logic to spawn bulkvoldict thread + 1) Calculate endindex only while total thread is non zero + 2) Update end index correctly to pass index for bulkvoldict + thread + +> Fixes: bz#1704252 +> Change-Id: I1def847fbdd6a605e7687bfc4e42b706bf0eb70b +> (Cherry picked from commit ac70f66c5805e10b3a1072bd467918730c0aeeb4) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22647/) + +BUG: 1704769 +Change-Id: I1def847fbdd6a605e7687bfc4e42b706bf0eb70b +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169091 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ff6102b..efa5a86 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3436,9 +3436,19 @@ glusterd_add_bulk_volumes_create_thread(void *data) + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; +- if ((count < start) || (count > end)) ++ ++ /* Skip volumes if index count is less than start ++ index to handle volume for specific thread ++ */ ++ if (count < start) + continue; + ++ /* No need to process volume if index count is greater ++ than end index ++ */ ++ if (count > end) ++ break; ++ + ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); + if (ret) + goto out; +@@ -3499,9 +3509,11 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + totthread = 0; + } else { + totthread = volcnt / vol_per_thread_limit; +- endindex = volcnt % vol_per_thread_limit; +- if (endindex) +- totthread++; ++ if (totthread) { ++ endindex = volcnt % vol_per_thread_limit; ++ if (endindex) ++ totthread++; ++ } + } + + if (totthread == 0) { +@@ -3527,10 +3539,10 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + arg->this = this; + arg->voldict = dict_arr[i]; + arg->start = start; +- if (!endindex) { ++ if ((i + 1) != totthread) { + arg->end = ((i + 1) * vol_per_thread_limit); + } else { +- arg->end = (start + endindex); ++ arg->end = ((i * vol_per_thread_limit) + endindex); + } + th_ret = gf_thread_create_detached( + &th_id, glusterd_add_bulk_volumes_create_thread, arg, +-- +1.8.3.1 + diff --git a/SOURCES/0129-core-handle-memory-accounting-correctly.patch b/SOURCES/0129-core-handle-memory-accounting-correctly.patch new file mode 100644 index 0000000..1281d04 --- /dev/null +++ b/SOURCES/0129-core-handle-memory-accounting-correctly.patch @@ -0,0 +1,401 @@ +From f305ee93ec9dbbd679e1eb58c7c0bf8d9b5659d5 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 12 Apr 2019 13:40:59 +0200 +Subject: [PATCH 129/141] core: handle memory accounting correctly + +When a translator stops, memory accounting for that translator is not +destroyed (because there could remain memory allocated that references +it), but mutexes that coordinate updates of memory accounting were +destroyed. This caused incorrect memory accounting and even crashes in +debug mode. + +This patch also fixes some other things: + +* Reduce the number of atomic operations needed to manage memory + accounting. +* Correctly account memory when realloc() is used. +* Merge two critical sections into one. +* Cleaned the code a bit. + +Upstream patch: +> Change-Id: Id5eaee7338729b9bc52c931815ca3ff1e5a7dcc8 +> Upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22554/ +> BUG: 1659334 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: Id5eaee7338729b9bc52c931815ca3ff1e5a7dcc8 +Fixes: bz#1702270 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169325 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 2 + + libglusterfs/src/libglusterfs.sym | 1 + + libglusterfs/src/mem-pool.c | 193 ++++++++++++++++-------------------- + libglusterfs/src/xlator.c | 23 +++-- + 4 files changed, 105 insertions(+), 114 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 06152ec..8998976 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1035,6 +1035,8 @@ gf_boolean_t + loc_is_nameless(loc_t *loc); + int + xlator_mem_acct_init(xlator_t *xl, int num_types); ++void ++xlator_mem_acct_unref(struct mem_acct *mem_acct); + int + is_gf_log_command(xlator_t *trans, const char *name, char *value); + int +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index fa2025e..cf5757c 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1093,6 +1093,7 @@ xlator_foreach + xlator_foreach_depth_first + xlator_init + xlator_mem_acct_init ++xlator_mem_acct_unref + xlator_notify + xlator_option_info_list + xlator_option_init_bool +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 34cb87a..3934a78 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -35,61 +35,92 @@ gf_mem_acct_enable_set(void *data) + return; + } + +-int +-gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type, +- const char *typestr) ++static void * ++gf_mem_header_prepare(struct mem_header *header, size_t size) + { +- void *ptr = NULL; +- struct mem_header *header = NULL; ++ void *ptr; + +- if (!alloc_ptr) +- return -1; ++ header->size = size; + +- ptr = *alloc_ptr; ++ ptr = header + 1; + +- GF_ASSERT(xl != NULL); ++ /* data follows in this gap of 'size' bytes */ ++ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC; + +- GF_ASSERT(xl->mem_acct != NULL); ++ return ptr; ++} + +- GF_ASSERT(type <= xl->mem_acct->num_types); ++static void * ++gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header, ++ size_t size, uint32_t type, const char *typestr) ++{ ++ struct mem_acct_rec *rec = NULL; ++ bool new_ref = false; + +- LOCK(&xl->mem_acct->rec[type].lock); +- { +- if (!xl->mem_acct->rec[type].typestr) +- xl->mem_acct->rec[type].typestr = typestr; +- xl->mem_acct->rec[type].size += size; +- xl->mem_acct->rec[type].num_allocs++; +- xl->mem_acct->rec[type].total_allocs++; +- xl->mem_acct->rec[type].max_size = max(xl->mem_acct->rec[type].max_size, +- xl->mem_acct->rec[type].size); +- xl->mem_acct->rec[type].max_num_allocs = max( +- xl->mem_acct->rec[type].max_num_allocs, +- xl->mem_acct->rec[type].num_allocs); +- } +- UNLOCK(&xl->mem_acct->rec[type].lock); ++ if (mem_acct != NULL) { ++ GF_ASSERT(type <= mem_acct->num_types); + +- GF_ATOMIC_INC(xl->mem_acct->refcnt); ++ rec = &mem_acct->rec[type]; ++ LOCK(&rec->lock); ++ { ++ if (!rec->typestr) { ++ rec->typestr = typestr; ++ } ++ rec->size += size; ++ new_ref = (rec->num_allocs == 0); ++ rec->num_allocs++; ++ rec->total_allocs++; ++ rec->max_size = max(rec->max_size, rec->size); ++ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs); ++ ++#ifdef DEBUG ++ list_add(&header->acct_list, &rec->obj_list); ++#endif ++ } ++ UNLOCK(&rec->lock); ++ ++ /* We only take a reference for each memory type used, not for each ++ * allocation. This minimizes the use of atomic operations. */ ++ if (new_ref) { ++ GF_ATOMIC_INC(mem_acct->refcnt); ++ } ++ } + +- header = (struct mem_header *)ptr; + header->type = type; +- header->size = size; +- header->mem_acct = xl->mem_acct; ++ header->mem_acct = mem_acct; + header->magic = GF_MEM_HEADER_MAGIC; + ++ return gf_mem_header_prepare(header, size); ++} ++ ++static void * ++gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header, ++ size_t size) ++{ ++ struct mem_acct_rec *rec = NULL; ++ ++ if (mem_acct != NULL) { ++ rec = &mem_acct->rec[header->type]; ++ LOCK(&rec->lock); ++ { ++ rec->size += size - header->size; ++ rec->total_allocs++; ++ rec->max_size = max(rec->max_size, rec->size); ++ + #ifdef DEBUG +- INIT_LIST_HEAD(&header->acct_list); +- LOCK(&xl->mem_acct->rec[type].lock); +- { +- list_add(&header->acct_list, &(xl->mem_acct->rec[type].obj_list)); +- } +- UNLOCK(&xl->mem_acct->rec[type].lock); ++ /* The old 'header' already was present in 'obj_list', but ++ * realloc() could have changed its address. We need to remove ++ * the old item from the list and add the new one. This can be ++ * done this way because list_move() doesn't use the pointers ++ * to the old location (which are not valid anymore) already ++ * present in the list, it simply overwrites them. */ ++ list_move(&header->acct_list, &rec->obj_list); + #endif +- ptr += sizeof(struct mem_header); +- /* data follows in this gap of 'size' bytes */ +- *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC; ++ } ++ UNLOCK(&rec->lock); ++ } + +- *alloc_ptr = ptr; +- return 0; ++ return gf_mem_header_prepare(header, size); + } + + void * +@@ -97,7 +128,7 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr) + { + size_t tot_size = 0; + size_t req_size = 0; +- char *ptr = NULL; ++ void *ptr = NULL; + xlator_t *xl = NULL; + + if (!THIS->ctx->mem_acct_enable) +@@ -114,16 +145,15 @@ __gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr) + gf_msg_nomem("", GF_LOG_ALERT, tot_size); + return NULL; + } +- gf_mem_set_acct_info(xl, &ptr, req_size, type, typestr); + +- return (void *)ptr; ++ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr); + } + + void * + __gf_malloc(size_t size, uint32_t type, const char *typestr) + { + size_t tot_size = 0; +- char *ptr = NULL; ++ void *ptr = NULL; + xlator_t *xl = NULL; + + if (!THIS->ctx->mem_acct_enable) +@@ -138,84 +168,32 @@ __gf_malloc(size_t size, uint32_t type, const char *typestr) + gf_msg_nomem("", GF_LOG_ALERT, tot_size); + return NULL; + } +- gf_mem_set_acct_info(xl, &ptr, size, type, typestr); + +- return (void *)ptr; ++ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr); + } + + void * + __gf_realloc(void *ptr, size_t size) + { + size_t tot_size = 0; +- char *new_ptr; +- struct mem_header *old_header = NULL; +- struct mem_header *new_header = NULL; +- struct mem_header tmp_header; ++ struct mem_header *header = NULL; + + if (!THIS->ctx->mem_acct_enable) + return REALLOC(ptr, size); + + REQUIRE(NULL != ptr); + +- old_header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE); +- GF_ASSERT(old_header->magic == GF_MEM_HEADER_MAGIC); +- tmp_header = *old_header; +- +-#ifdef DEBUG +- int type = 0; +- size_t copy_size = 0; +- +- /* Making these changes for realloc is not straightforward. So +- * I am simulating realloc using calloc and free +- */ +- +- type = tmp_header.type; +- new_ptr = __gf_calloc(1, size, type, +- tmp_header.mem_acct->rec[type].typestr); +- if (new_ptr) { +- copy_size = (size > tmp_header.size) ? tmp_header.size : size; +- memcpy(new_ptr, ptr, copy_size); +- __gf_free(ptr); +- } +- +- /* This is not quite what the man page says should happen */ +- return new_ptr; +-#endif ++ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE); ++ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC); + + tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE; +- new_ptr = realloc(old_header, tot_size); +- if (!new_ptr) { ++ header = realloc(header, tot_size); ++ if (!header) { + gf_msg_nomem("", GF_LOG_ALERT, tot_size); + return NULL; + } + +- /* +- * We used to pass (char **)&ptr as the second +- * argument after the value of realloc was saved +- * in ptr, but the compiler warnings complained +- * about the casting to and forth from void ** to +- * char **. +- * TBD: it would be nice to adjust the memory accounting info here, +- * but calling gf_mem_set_acct_info here is wrong because it bumps +- * up counts as though this is a new allocation - which it's not. +- * The consequence of doing nothing here is only that the sizes will be +- * wrong, but at least the counts won't be. +- uint32_t type = 0; +- xlator_t *xl = NULL; +- type = header->type; +- xl = (xlator_t *) header->xlator; +- gf_mem_set_acct_info (xl, &new_ptr, size, type, NULL); +- */ +- +- new_header = (struct mem_header *)new_ptr; +- *new_header = tmp_header; +- new_header->size = size; +- +- new_ptr += sizeof(struct mem_header); +- /* data follows in this gap of 'size' bytes */ +- *(uint32_t *)(new_ptr + size) = GF_MEM_TRAILER_MAGIC; +- +- return (void *)new_ptr; ++ return gf_mem_update_acct_info(header->mem_acct, header, size); + } + + int +@@ -321,6 +299,7 @@ __gf_free(void *free_ptr) + void *ptr = NULL; + struct mem_acct *mem_acct; + struct mem_header *header = NULL; ++ bool last_ref = false; + + if (!THIS->ctx->mem_acct_enable) { + FREE(free_ptr); +@@ -352,16 +331,18 @@ __gf_free(void *free_ptr) + mem_acct->rec[header->type].num_allocs--; + /* If all the instances are freed up then ensure typestr is set + * to NULL */ +- if (!mem_acct->rec[header->type].num_allocs) ++ if (!mem_acct->rec[header->type].num_allocs) { ++ last_ref = true; + mem_acct->rec[header->type].typestr = NULL; ++ } + #ifdef DEBUG + list_del(&header->acct_list); + #endif + } + UNLOCK(&mem_acct->rec[header->type].lock); + +- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { +- FREE(mem_acct); ++ if (last_ref) { ++ xlator_mem_acct_unref(mem_acct); + } + + free: +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 5d6f8d2..022c3ed 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -736,6 +736,19 @@ xlator_mem_acct_init(xlator_t *xl, int num_types) + } + + void ++xlator_mem_acct_unref(struct mem_acct *mem_acct) ++{ ++ uint32_t i; ++ ++ if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { ++ for (i = 0; i < mem_acct->num_types; i++) { ++ LOCK_DESTROY(&(mem_acct->rec[i].lock)); ++ } ++ FREE(mem_acct); ++ } ++} ++ ++void + xlator_tree_fini(xlator_t *xl) + { + xlator_t *top = NULL; +@@ -766,7 +779,6 @@ xlator_list_destroy(xlator_list_t *list) + int + xlator_memrec_free(xlator_t *xl) + { +- uint32_t i = 0; + struct mem_acct *mem_acct = NULL; + + if (!xl) { +@@ -775,13 +787,8 @@ xlator_memrec_free(xlator_t *xl) + mem_acct = xl->mem_acct; + + if (mem_acct) { +- for (i = 0; i < mem_acct->num_types; i++) { +- LOCK_DESTROY(&(mem_acct->rec[i].lock)); +- } +- if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) { +- FREE(mem_acct); +- xl->mem_acct = NULL; +- } ++ xlator_mem_acct_unref(mem_acct); ++ xl->mem_acct = NULL; + } + + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch b/SOURCES/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch new file mode 100644 index 0000000..2bd360f --- /dev/null +++ b/SOURCES/0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch @@ -0,0 +1,117 @@ +From 01bb17a0910a638e89a44a6da4b1359123940498 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Wed, 17 Apr 2019 12:17:27 +0530 +Subject: [PATCH 130/141] tier/test: new-tier-cmds.t fails after a glusterd + restart + +Problem: new-tier-cmds.t does a restart of gluster processes and +after the restart the bricks and the tier process takes more +time than before to come online. This causes the detach start to +fail. + +Fix: Give it enough time to come online after the restart. + +label: DOWNSTREAM ONLY + +Change-Id: I0f50b0bb77fe49ebd3a0292e190d0350d7994cfe +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/168130 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/tier/new-tier-cmds.t | 45 ++++++++++++++++++++++++++-------------- + tests/volume.rc | 8 +++++++ + 2 files changed, 37 insertions(+), 16 deletions(-) + +diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t +index b9c9390..92881ac 100644 +--- a/tests/basic/tier/new-tier-cmds.t ++++ b/tests/basic/tier/new-tier-cmds.t +@@ -19,14 +19,6 @@ function create_dist_tier_vol () { + TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6 + } + +-function tier_daemon_status { +- local _VAR=CLI_$1 +- local xpath_sel='//node[hostname="Tier Daemon"][path="localhost"]/status' +- ${!_VAR} --xml volume status $V0 \ +- | xmllint --xpath "$xpath_sel" - \ +- | sed -n '/.*<status>\([0-9]*\).*/s//\1/p' +-} +- + function detach_xml_status { + $CLI_1 volume tier $V0 detach status --xml | sed -n \ + '/.*<opErrstr>Detach tier status successful/p' | wc -l +@@ -70,7 +62,20 @@ TEST $glusterd_2; + EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; + + #after starting detach tier the detach tier status should display the status +-sleep 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_tierd_count + $CLI_1 volume status + TEST $CLI_1 volume tier $V0 detach start + +@@ -91,13 +96,21 @@ EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; + EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2 + EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2 + +-# Parsing normal output doesn't work because of line-wrap issues on our +-# regression machines, and the version of xmllint there doesn't support --xpath +-# so we can't do it that way either. In short, there's no way for us to detect +-# when we can stop waiting, so we just have to wait the maximum time every time +-# and hope any failures will show up later in the script. +-sleep $PROCESS_UP_TIMEOUT +-#XPECT_WITHIN $PROCESS_UP_TIMEOUT 1 tier_daemon_status 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_tierd_count ++$CLI_1 volume status + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status + +diff --git a/tests/volume.rc b/tests/volume.rc +index 289b197..b326098 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -719,6 +719,14 @@ function get_snapd_count { + ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l + } + ++function get_tierd_count { ++ ps auxww | grep glusterfs | grep tierd.pid | grep -v grep | wc -l ++} ++ ++function get_shd_count { ++ ps auxww | grep glusterfs | grep shd.pid | grep -v grep | wc -l ++} ++ + function drop_cache() { + case $OSTYPE in + Linux) +-- +1.8.3.1 + diff --git a/SOURCES/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch b/SOURCES/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch new file mode 100644 index 0000000..6238fb1 --- /dev/null +++ b/SOURCES/0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch @@ -0,0 +1,113 @@ +From a0949929282529e0e866e074721c1bdfe3928c8c Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Thu, 11 Apr 2019 12:12:12 +0530 +Subject: [PATCH 131/141] tests/dht: Test that lookups are sent post brick up + +upstream: https://review.gluster.org/#/c/glusterfs/+/22545/ + +>Change-Id: I3556793c5e9d58cc6a08644b41dc5740fab2610b +>updates: bz#1628194 + +BUG:1704562 +Change-Id: Ie45331298902bd5268c56cb29a966d8246abfd6d +Signed-off-by: N Balachandran <nbalacha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169592 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/distribute/brick-down.t | 83 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 83 insertions(+) + create mode 100644 tests/basic/distribute/brick-down.t + +diff --git a/tests/basic/distribute/brick-down.t b/tests/basic/distribute/brick-down.t +new file mode 100644 +index 0000000..522ccc0 +--- /dev/null ++++ b/tests/basic/distribute/brick-down.t +@@ -0,0 +1,83 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../common-utils.rc ++. $(dirname $0)/../../dht.rc ++ ++# Test 1 overview: ++# ---------------- ++# Test whether lookups are sent after a brick comes up again ++# ++# 1. Create a 3 brick pure distribute volume ++# 2. Fuse mount the volume so the layout is set on the root ++# 3. Kill one brick and try to create a directory which hashes to that brick. ++# It should fail with EIO. ++# 4. Restart the brick that was killed. ++# 5. Do not remount the volume. Try to create the same directory as in step 3. ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3} ++TEST $CLI volume start $V0 ++ ++# We want the lookup to reach DHT ++TEST $CLI volume set $V0 performance.stat-prefetch off ++ ++# Mount using FUSE and lookup the mount so a layout is set on the brick root ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ls $M0/ ++ ++TEST mkdir $M0/level1 ++ ++# Find a dirname that will hash to the brick we are going to kill ++hashed=$V0-client-1 ++TEST dht_first_filename_with_hashsubvol "$hashed" $M0 "dir-" ++roottestdir=$fn_return_val ++ ++hashed=$V0-client-1 ++TEST dht_first_filename_with_hashsubvol "$hashed" $M0/level1 "dir-" ++level1testdir=$fn_return_val ++ ++ ++TEST kill_brick $V0 $H0 $B0/$V0-2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-2 ++ ++TEST $CLI volume status $V0 ++ ++ ++# Unmount and mount the volume again so dht has an incomplete in memory layout ++ ++umount -f $M0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++ ++ ++mkdir $M0/$roottestdir ++TEST [ $? -ne 0 ] ++ ++mkdir $M0/level1/$level1testdir ++TEST [ $? -ne 0 ] ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-2 ++ ++#$CLI volume status ++ ++# It takes a while for the client to reconnect to the brick ++sleep 5 ++ ++ ++mkdir $M0/$roottestdir ++TEST [ $? -eq 0 ] ++ ++mkdir $M0/$level1/level1testdir ++TEST [ $? -eq 0 ] ++ ++# Cleanup ++cleanup ++ ++ +-- +1.8.3.1 + diff --git a/SOURCES/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch b/SOURCES/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch new file mode 100644 index 0000000..2c7888d --- /dev/null +++ b/SOURCES/0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch @@ -0,0 +1,41 @@ +From 83d5ebd6ca68e319db86e310cf072888d0f0f1d1 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 8 May 2019 10:07:29 +0530 +Subject: [PATCH 132/141] glusterd: remove duplicate occurrence of + features.selinux from volume option table + +Label : DOWNSTREAM ONLY + +Change-Id: I0a49fece7a1fcbb9f3bbfe5806ec470aeb33ad70 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169664 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 10aa2ae..e52de20 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3242,16 +3242,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { + "pages." + "The max value is 262144 pages i.e 1 GB and " + "the min value is 1000 pages i.e ~4 MB."}, +- {.key = VKEY_FEATURES_SELINUX, +- .voltype = "features/selinux", +- .type = NO_DOC, +- .value = "on", +- .op_version = GD_OP_VERSION_3_11_0, +- .description = "Convert security.selinux xattrs to " +- "trusted.gluster.selinux on the bricks. Recommended " +- "to have enabled when clients and/or bricks support " +- "SELinux."}, +- + #endif /* USE_GFDB */ + { + .key = "locks.trace", +-- +1.8.3.1 + diff --git a/SOURCES/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch b/SOURCES/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch new file mode 100644 index 0000000..88f4bd0 --- /dev/null +++ b/SOURCES/0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch @@ -0,0 +1,62 @@ +From f1f27e5839dd99389bef65f79ea491e98e6935d2 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Tue, 23 Apr 2019 18:05:36 +0530 +Subject: [PATCH 133/141] glusterd: enable fips-mode-rchecksum for new volumes + +...during volume create if the cluster op-version is >=GD_OP_VERSION_7_0. + +This option itself was introduced in GD_OP_VERSION_4_0_0 via commit 6daa65356. +We missed enabling it by default for new volume creates in that commit. +If we are to do it now safely, we need to use op version +GD_OP_VERSION_7_0 and target it for release-7. + +Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/22609/ +BUG: 1706683 +Change-Id: I7c6d4a8abe0816367e7069cb5cad01744f04858f +fixes: bz#1706683 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169443 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index da877aa..77aa705 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -1614,10 +1614,17 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + gf_boolean_t pgfid_feat = _gf_false; + char *value = NULL; + xlator_t *xl = NULL; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; + + if (!graph || !volinfo || !set_dict || !brickinfo) + goto out; + ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO("glusterd", priv, out); ++ + ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_QUOTA, &value); + if (value) { + ret = gf_string2boolean(value, "a_enabled); +@@ -1661,6 +1668,12 @@ brick_graph_add_posix(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + } + } + ++ if (priv->op_version >= GD_OP_VERSION_7_0) { ++ ret = xlator_set_fixed_option(xl, "fips-mode-rchecksum", "on"); ++ if (ret) { ++ goto out; ++ } ++ } + snprintf(tmpstr, sizeof(tmpstr), "%d", brickinfo->fs_share_count); + ret = xlator_set_fixed_option(xl, "shared-brick-count", tmpstr); + out: +-- +1.8.3.1 + diff --git a/SOURCES/0134-performance-write-behind-remove-request-from-wip-lis.patch b/SOURCES/0134-performance-write-behind-remove-request-from-wip-lis.patch new file mode 100644 index 0000000..d20ca09 --- /dev/null +++ b/SOURCES/0134-performance-write-behind-remove-request-from-wip-lis.patch @@ -0,0 +1,79 @@ +From 76127f4f8f3c2bf415f66a335e7b37670cb9bd84 Mon Sep 17 00:00:00 2001 +From: Raghavendra G <rgowdapp@redhat.com> +Date: Fri, 3 May 2019 10:14:48 +0530 +Subject: [PATCH 134/141] performance/write-behind: remove request from wip + list in wb_writev_cbk + +There is a race in the way O_DIRECT writes are handled. Assume two +overlapping write requests w1 and w2. + +* w1 is issued and is in wb_inode->wip queue as the response is still + pending from bricks. Also wb_request_unref in wb_do_winds is not yet + invoked. + + list_for_each_entry_safe (req, tmp, tasks, winds) { + list_del_init (&req->winds); + + if (req->op_ret == -1) { + call_unwind_error_keep_stub (req->stub, req->op_ret, + req->op_errno); + } else { + call_resume_keep_stub (req->stub); + } + + wb_request_unref (req); + } + +* w2 is issued and wb_process_queue is invoked. w2 is not picked up + for winding as w1 is still in wb_inode->wip. w1 is added to todo + list and wb_writev for w2 returns. + +* response to w1 is received and invokes wb_request_unref. Assume + wb_request_unref in wb_do_winds (see point 1) is not invoked + yet. Since there is one more refcount, wb_request_unref in + wb_writev_cbk of w1 doesn't remove w1 from wip. + +* wb_process_queue is invoked as part of wb_writev_cbk of w1. But, it + fails to wind w2 as w1 is still in wip. + +* wb_requet_unref is invoked on w1 as part of wb_do_winds. w1 is + removed from all queues including w1. + +* After this point there is no invocation of wb_process_queue unless + new request is issued from application causing w2 to be hung till + the next request. + +This bug is similar to bz 1626780 and bz 1379655. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22654/ +BUG: 1702686 +Change-Id: Iaa47437613591699d4c8ad18bc0b32de6affcc31 +fixes: bz#1702686 +Signed-off-by: Raghavendra G <rgowdapp@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169552 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/write-behind/src/write-behind.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index cf302bd..70e281a 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -1813,6 +1813,12 @@ wb_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + frame->local = NULL; + wb_inode = req->wb_inode; + ++ LOCK(&req->wb_inode->lock); ++ { ++ list_del_init(&req->wip); ++ } ++ UNLOCK(&req->wb_inode->lock); ++ + wb_request_unref(req); + + /* requests could be pending while this was in progress */ +-- +1.8.3.1 + diff --git a/SOURCES/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch b/SOURCES/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch new file mode 100644 index 0000000..e6d7889 --- /dev/null +++ b/SOURCES/0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch @@ -0,0 +1,56 @@ +From 677f575d2289285d2e553ddd610944856cb947db Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Fri, 10 May 2019 11:21:03 +0530 +Subject: [PATCH 135/141] geo-rep: fix incorrectly formatted authorized_keys + +There are two ways for creating secret pem pub file during geo-rep +setup. +1. gluster-georep-sshkey generate +2. gluster system:: execute gsec_create + +Below patch solves this problem for `gluster-georep-sshkey generate` +method. +Patch link: https://review.gluster.org/#/c/glusterfs/+/22246/ + +This patch is added to support old way of creating secret pem pub file +`gluster system:: execute gsec_create`. + +Problem: While Geo-rep setup when creating an ssh authorized_keys + the geo-rep setup inserts an extra space before the "ssh-rsa" label. + This gets flagged by an enterprise customer's security scan as a + security violation. +Solution: Remove extra space while creating secret key. + +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22673/ + +>fixes: bz#1679401 +>Change-Id: I92ba7e25aaa5123dae9ebe2f3c68d14315aa5f0e +>Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1671862 +Change-Id: I11e90c00a14a301a5d95e14b5e8984867e6ff893 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169870 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/src/peer_gsec_create.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in +index 05c1638..6d4a484 100755 +--- a/geo-replication/src/peer_gsec_create.in ++++ b/geo-replication/src/peer_gsec_create.in +@@ -18,7 +18,7 @@ if [ "Xcontainer" = "X$1" ]; then + output1=`cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` + output2=`cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` + else +- output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` +- output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" " "``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` ++ output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub` ++ output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub` + fi + echo -e "$output1\n$output2" +-- +1.8.3.1 + diff --git a/SOURCES/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch b/SOURCES/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch new file mode 100644 index 0000000..403dcb3 --- /dev/null +++ b/SOURCES/0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch @@ -0,0 +1,51 @@ +From c63346dab3e5da0605bf4ddaa314253f42892c9d Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Wed, 8 May 2019 12:13:59 +0530 +Subject: [PATCH 136/141] glusterd: fix inconsistent global option output in + volume get + +volume get all all | grep <key> & volume get <volname> all | grep <key> +dumps two different output value for cluster.brick-multiplex and +cluster.server-quorum-ratio + +>upstream patch : https://review.gluster.org/#/c/glusterfs/+/22680/ + +>Fixes: bz#1707700 +>Change-Id: Id131734e0502aa514b84768cf67fce3c22364eae +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1706776 +Change-Id: Id131734e0502aa514b84768cf67fce3c22364eae +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169948 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index e52de20..4b32fb6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2906,7 +2906,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = 1}, + {.key = GLUSTERD_QUORUM_RATIO_KEY, + .voltype = "mgmt/glusterd", +- .value = "0", ++ .value = "51", + .op_version = 1}, + /* changelog translator - global tunables */ + {.key = "changelog.changelog", +@@ -3547,7 +3547,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + /* Brick multiplexing options */ + {.key = GLUSTERD_BRICK_MULTIPLEX_KEY, + .voltype = "mgmt/glusterd", +- .value = "off", ++ .value = "disable", + .op_version = GD_OP_VERSION_3_10_0, + .validate_fn = validate_boolean, + .type = GLOBAL_DOC, +-- +1.8.3.1 + diff --git a/SOURCES/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch b/SOURCES/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch new file mode 100644 index 0000000..24dd1db --- /dev/null +++ b/SOURCES/0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch @@ -0,0 +1,160 @@ +From 646292b4f73bf1b506d034b85787f794963d7196 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Mon, 6 May 2019 23:35:08 +0530 +Subject: [PATCH 137/141] shd/glusterd: Serialize shd manager to prevent race + condition + +At the time of a glusterd restart, while doing a handshake +there is a possibility that multiple shd manager might get +executed. Because of this, there is a chance that multiple +shd get spawned during a glusterd restart + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22667/ + +>Change-Id: Ie20798441e07d7d7a93b7d38dfb924cea178a920 +>fixes: bz#1707081 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +BUG: 1704851 +Change-Id: Ie20798441e07d7d7a93b7d38dfb924cea178a920 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169947 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../serialize-shd-manager-glusterd-restart.t | 54 ++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ++++++ + xlators/mgmt/glusterd/src/glusterd.c | 1 + + xlators/mgmt/glusterd/src/glusterd.h | 3 ++ + 4 files changed, 72 insertions(+) + create mode 100644 tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t + +diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t +new file mode 100644 +index 0000000..3a27c2a +--- /dev/null ++++ b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t +@@ -0,0 +1,54 @@ ++#! /bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../cluster.rc ++ ++function check_peers { ++count=`$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l` ++echo $count ++} ++ ++function check_shd { ++ps aux | grep $1 | grep glustershd | wc -l ++} ++ ++cleanup ++ ++ ++TEST launch_cluster 6 ++ ++TESTS_EXPECTED_IN_LOOP=25 ++for i in $(seq 2 6); do ++ hostname="H$i" ++ TEST $CLI_1 peer probe ${!hostname} ++done ++ ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers; ++for i in $(seq 1 5); do ++ ++ TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i ++ TEST $CLI_1 volume start ${V0}_$i force ++ ++done ++ ++#kill a node ++TEST kill_node 3 ++ ++TEST $glusterd_3; ++EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3 ++ ++for i in $(seq 1 5); do ++ ++ TEST $CLI_1 volume stop ${V0}_$i ++ TEST $CLI_1 volume delete ${V0}_$i ++ ++done ++ ++for i in $(seq 1 6); do ++ hostname="H$i" ++ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname} ++done ++cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index a9eab42..75f9a07 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -254,14 +254,26 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + { + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; ++ glusterd_conf_t *conf = NULL; ++ gf_boolean_t shd_restart = _gf_false; + ++ conf = THIS->private; + volinfo = data; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + if (volinfo) + glusterd_volinfo_ref(volinfo); + ++ while (conf->restart_shd) { ++ synclock_unlock(&conf->big_lock); ++ sleep(2); ++ synclock_lock(&conf->big_lock); ++ } ++ conf->restart_shd = _gf_true; ++ shd_restart = _gf_true; ++ + ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret) + goto out; +@@ -310,6 +322,8 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + } + } + out: ++ if (shd_restart) ++ conf->restart_shd = _gf_false; + if (volinfo) + glusterd_volinfo_unref(volinfo); + if (ret) +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index c0973cb..6d7dd4a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1819,6 +1819,7 @@ init(xlator_t *this) + conf->rpc = rpc; + conf->uds_rpc = uds_rpc; + conf->gfs_mgmt = &gd_brick_prog; ++ conf->restart_shd = _gf_false; + this->private = conf; + /* conf->workdir and conf->rundir are smaller than PATH_MAX; gcc's + * snprintf checking will throw an error here if sprintf is used. +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index bd9f509..2ea8560 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -222,6 +222,9 @@ typedef struct { + gf_atomic_t blockers; + uint32_t mgmt_v3_lock_timeout; + gf_boolean_t restart_bricks; ++ gf_boolean_t restart_shd; /* This flag prevents running two shd manager ++ simultaneously ++ */ + pthread_mutex_t attach_lock; /* Lock can be per process or a common one */ + pthread_mutex_t volume_lock; /* We release the big_lock from lot of places + which might lead the modification of volinfo +-- +1.8.3.1 + diff --git a/SOURCES/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch b/SOURCES/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch new file mode 100644 index 0000000..9b8bb86 --- /dev/null +++ b/SOURCES/0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch @@ -0,0 +1,64 @@ +From d08083d057d6cc7136128cad6ecefba43b886c4c Mon Sep 17 00:00:00 2001 +From: Vishal Pandey <vpandey@redhat.com> +Date: Thu, 9 May 2019 14:37:22 +0530 +Subject: [PATCH 138/141] glusterd: Add gluster volume stop operation to + glusterd_validate_quorum() + +ISSUE: gluster volume stop succeeds even if quorum is not met. + +Fix: Add GD_OP_STOP_VOLUME to gluster_validate_quorum in +glusterd_mgmt_v3_pre_validate (). + +Since the volume stop command has been ported from synctask to mgmt_v3, +the quorum check was missed out. + +>upstream patch : https://review.gluster.org/#/c/glusterfs/+/22692/ + +>Change-Id: I7a634ad89ec2e286ea262d7952061efad5360042 +>fixes: bz#1690753 +>Signed-off-by: Vishal Pandey <vpandey@redhat.com> + +BUG: 1706893 +Change-Id: I7a634ad89ec2e286ea262d7952061efad5360042 +Signed-off-by: Vishal Pandey <vpandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169949 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/quorum-validation.t | 4 +++- + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 2 +- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t +index 05aef4e..ff46729 100644 +--- a/tests/bugs/glusterd/quorum-validation.t ++++ b/tests/bugs/glusterd/quorum-validation.t +@@ -34,9 +34,11 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2 + TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start + TEST ! $CLI_1 volume set $V0 barrier enable + +-# Now execute a command which goes through op state machine and it should fail + TEST ! $CLI_1 volume profile $V0 start + ++#bug-1690753 - Volume stop when quorum not met is successful ++TEST ! $CLI_1 volume stop $V0 ++ + #Bring back the 2nd glusterd + TEST $glusterd_2 + +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index 61ad66e..ec78913 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -1059,7 +1059,7 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + goto out; + } + +- if (op == GD_OP_PROFILE_VOLUME) { ++ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME) { + ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET, +-- +1.8.3.1 + diff --git a/SOURCES/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch b/SOURCES/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch new file mode 100644 index 0000000..4f8ec9c --- /dev/null +++ b/SOURCES/0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch @@ -0,0 +1,300 @@ +From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Mon, 29 Apr 2019 13:22:32 +0530 +Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec + fini + +We were not properly cleaning self-heal daemon resources +during ec fini. With shd multiplexing, it is absolutely +necessary to cleanup all the resources during ec fini. + +Back port of + upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/ + >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2 + >fixes: bz#1703948 + >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +BUG: 1703434 +Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169994 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/syncop-utils.c | 2 + + xlators/cluster/afr/src/afr-self-heald.c | 5 +++ + xlators/cluster/ec/src/ec-heald.c | 77 +++++++++++++++++++++++++++----- + xlators/cluster/ec/src/ec-heald.h | 3 ++ + xlators/cluster/ec/src/ec-messages.h | 3 +- + xlators/cluster/ec/src/ec.c | 47 +++++++++++++++++++ + 6 files changed, 124 insertions(+), 13 deletions(-) + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index b842142..4167db4 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + + if (frame) { + this = frame->this; ++ } else { ++ this = THIS; + } + + /*For this functionality to be implemented in general, we need +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 8bc4720..522fe5d 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + afr_private_t *priv = NULL; + + priv = this->private; ++ ++ if (this->cleanup_starting) { ++ return -ENOTCONN; ++ } ++ + if (!priv->shd.enabled) + return -EBUSY; + +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index cba111a..edf5e11 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -71,6 +71,11 @@ disabled_loop: + break; + } + ++ if (ec->shutdown) { ++ healer->running = _gf_false; ++ return -1; ++ } ++ + ret = healer->rerun; + healer->rerun = 0; + +@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer) + goto out; + } + ++ _mask_cancellation(); + ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, + healer, ec_shd_index_heal, xdata, + ec->shd.max_threads, ec->shd.wait_qlength); ++ _unmask_cancellation(); + out: + if (xdata) + dict_unref(xdata); +@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + int ret = 0; + + ec = this->private; ++ ++ if (this->cleanup_starting) { ++ return -ENOTCONN; ++ } ++ + if (ec->xl_up_count <= ec->fragments) { + return -ENOTCONN; + } +@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode) + { + ec_t *ec = NULL; + loc_t loc = {0}; ++ int ret = -1; + + ec = healer->this->private; + loc.inode = inode; +- return syncop_ftw(ec->xl_list[healer->subvol], &loc, +- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); ++ _mask_cancellation(); ++ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc, ++ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); ++ _unmask_cancellation(); ++ return ret; + } + + void * +@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data) + { + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; ++ int run = 0; + + healer = data; + THIS = this = healer->this; + ec_t *ec = this->private; + + for (;;) { +- ec_shd_healer_wait(healer); ++ run = ec_shd_healer_wait(healer); ++ if (run == -1) ++ break; + + if (ec->xl_up_count > ec->fragments) { + gf_msg_debug(this->name, 0, "starting index sweep on subvol %s", +@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data) + + rootloc.inode = this->itable->root; + for (;;) { +- pthread_mutex_lock(&healer->mutex); +- { +- run = __ec_shd_healer_wait(healer); +- if (!run) +- healer->running = _gf_false; +- } +- pthread_mutex_unlock(&healer->mutex); +- +- if (!run) ++ run = ec_shd_healer_wait(healer); ++ if (run < 0) { + break; ++ } else if (run == 0) { ++ continue; ++ } + + if (ec->xl_up_count > ec->fragments) { + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START, +@@ -562,3 +577,41 @@ out: + dict_del(output, this->name); + return ret; + } ++ ++void ++ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) ++{ ++ if (!healer) ++ return; ++ ++ pthread_cond_destroy(&healer->cond); ++ pthread_mutex_destroy(&healer->mutex); ++} ++ ++void ++ec_selfheal_daemon_fini(xlator_t *this) ++{ ++ struct subvol_healer *healer = NULL; ++ ec_self_heald_t *shd = NULL; ++ ec_t *priv = NULL; ++ int i = 0; ++ ++ priv = this->private; ++ if (!priv) ++ return; ++ ++ shd = &priv->shd; ++ if (!shd->iamshd) ++ return; ++ ++ for (i = 0; i < priv->nodes; i++) { ++ healer = &shd->index_healers[i]; ++ ec_destroy_healer_object(this, healer); ++ ++ healer = &shd->full_healers[i]; ++ ec_destroy_healer_object(this, healer); ++ } ++ ++ GF_FREE(shd->index_healers); ++ GF_FREE(shd->full_healers); ++} +diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h +index 2eda2a7..8184cf4 100644 +--- a/xlators/cluster/ec/src/ec-heald.h ++++ b/xlators/cluster/ec/src/ec-heald.h +@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this); + void + ec_shd_index_healer_wake(ec_t *ec); + ++void ++ec_selfheal_daemon_fini(xlator_t *this); ++ + #endif /* __EC_HEALD_H__ */ +diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h +index 7c28808..ce299bb 100644 +--- a/xlators/cluster/ec/src/ec-messages.h ++++ b/xlators/cluster/ec/src/ec-messages.h +@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, + EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, + EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, + EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, +- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); ++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, ++ EC_MSG_THREAD_CLEANUP_FAILED); + + #endif /* !_EC_MESSAGES_H_ */ +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 3c8013e..264582a 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec) + } + + void ++ec_cleanup_healer_object(ec_t *ec) ++{ ++ struct subvol_healer *healer = NULL; ++ ec_self_heald_t *shd = NULL; ++ void *res = NULL; ++ int i = 0; ++ gf_boolean_t is_join = _gf_false; ++ ++ shd = &ec->shd; ++ if (!shd->iamshd) ++ return; ++ ++ for (i = 0; i < ec->nodes; i++) { ++ healer = &shd->index_healers[i]; ++ pthread_mutex_lock(&healer->mutex); ++ { ++ healer->rerun = 1; ++ if (healer->running) { ++ pthread_cond_signal(&healer->cond); ++ is_join = _gf_true; ++ } ++ } ++ pthread_mutex_unlock(&healer->mutex); ++ if (is_join) { ++ pthread_join(healer->thread, &res); ++ is_join = _gf_false; ++ } ++ ++ healer = &shd->full_healers[i]; ++ pthread_mutex_lock(&healer->mutex); ++ { ++ healer->rerun = 1; ++ if (healer->running) { ++ pthread_cond_signal(&healer->cond); ++ is_join = _gf_true; ++ } ++ } ++ pthread_mutex_unlock(&healer->mutex); ++ if (is_join) { ++ pthread_join(healer->thread, &res); ++ is_join = _gf_false; ++ } ++ } ++} ++void + ec_pending_fops_completed(ec_t *ec) + { + if (ec->shutdown) { +@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + /* If there aren't pending fops running after we have waken up + * them, we immediately propagate the notification. */ + propagate = ec_disable_delays(ec); ++ ec_cleanup_healer_object(ec); + goto unlock; + } + +@@ -759,6 +805,7 @@ failed: + void + fini(xlator_t *this) + { ++ ec_selfheal_daemon_fini(this); + __ec_destroy_private(this); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch b/SOURCES/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch new file mode 100644 index 0000000..b4b44e8 --- /dev/null +++ b/SOURCES/0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch @@ -0,0 +1,40 @@ +From 40bd6e9c186adb427e136a84eaab631e6a6f5263 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Sun, 5 May 2019 21:17:24 +0530 +Subject: [PATCH 140/141] cluster/ec: Reopen shouldn't happen with O_TRUNC + +Problem: +Doing re-open with O_TRUNC will truncate the fragment even when it is not +needed needing extra heals + +Fix: +At the time of re-open don't use O_TRUNC. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/22660/ + +fixes bz#1706549 +Change-Id: Idc6408968efaad897b95a5a52481c66e843d3fb8 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169982 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 1454ae2..b1ba5e9 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -128,7 +128,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) + } else { + ec_open(fop->frame, fop->xl, need_open, + EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc, +- fop->fd->flags, fop->fd, NULL); ++ fop->fd->flags & (~O_TRUNC), fop->fd, NULL); + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0141-socket-ssl-fix-crl-handling.patch b/SOURCES/0141-socket-ssl-fix-crl-handling.patch new file mode 100644 index 0000000..4c51ad0 --- /dev/null +++ b/SOURCES/0141-socket-ssl-fix-crl-handling.patch @@ -0,0 +1,295 @@ +From e3020e43344ddbc32e62e06bbbf88a4f5d7cdc82 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Fri, 10 May 2019 11:13:45 +0530 +Subject: [PATCH 141/141] socket/ssl: fix crl handling + +Problem: +Just setting the path to the CRL directory in socket_init() wasn't working. + +Solution: +Need to use special API to retrieve and set X509_VERIFY_PARAM and set +the CRL checking flags explicitly. +Also, setting the CRL checking flags is a big pain, since the connection +is declared as failed if any CRL isn't found in the designated file or +directory. A comment has been added to the code appropriately. + +> Change-Id: I8a8ed2ddaf4b5eb974387d2f7b1a85c1ca39fe79 +> fixes: bz#1687326 +> Signed-off-by: Milind Changire <mchangir@redhat.com> +> (Cherry pick from commit 06fa261207f0f0625c52fa977b96e5875e9a91e0) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22334/) + +Change-Id: I0958e9890035fd376f1e1eafc1452caf3edd184b +BUG: 1583585 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/166458 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + configure.ac | 2 + + rpc/rpc-transport/socket/src/socket.c | 110 ++++++++++++++++++++++++++++------ + rpc/rpc-transport/socket/src/socket.h | 2 + + tests/features/ssl-ciphers.t | 13 +++- + 4 files changed, 107 insertions(+), 20 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 3065077..0e11d4c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -491,6 +491,8 @@ AC_CHECK_HEADERS([openssl/dh.h]) + + AC_CHECK_HEADERS([openssl/ecdh.h]) + ++AC_CHECK_LIB([ssl], [SSL_CTX_get0_param], [AC_DEFINE([HAVE_SSL_CTX_GET0_PARAM], [1], [define if found OpenSSL SSL_CTX_get0_param])]) ++ + dnl Math library + AC_CHECK_LIB([m], [pow], [MATH_LIB='-lm'], [MATH_LIB='']) + AC_SUBST(MATH_LIB) +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index f6de1d3..bf2fa71 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -308,8 +308,65 @@ out: + #define ssl_write_one(t, b, l) \ + ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_write) + ++/* set crl verify flags only for server */ ++/* see man X509_VERIFY_PARAM_SET_FLAGS(3) ++ * X509_V_FLAG_CRL_CHECK enables CRL checking for the certificate chain ++ * leaf certificate. An error occurs if a suitable CRL cannot be found. ++ * Since we're never going to revoke a gluster node cert, we better disable ++ * CRL check for server certs to avoid getting error and failed connection ++ * attempts. ++ */ ++static void ++ssl_clear_crl_verify_flags(SSL_CTX *ssl_ctx) ++{ ++#ifdef X509_V_FLAG_CRL_CHECK_ALL ++#ifdef HAVE_SSL_CTX_GET0_PARAM ++ X509_VERIFY_PARAM *vpm; ++ ++ vpm = SSL_CTX_get0_param(ssl_ctx); ++ if (vpm) { ++ X509_VERIFY_PARAM_clear_flags( ++ vpm, (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL)); ++ } ++#else ++ /* CRL verify flag need not be cleared for rhel6 kind of clients */ ++#endif ++#else ++ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL"); ++#endif ++ return; ++} ++ ++/* set crl verify flags only for server */ ++static void ++ssl_set_crl_verify_flags(SSL_CTX *ssl_ctx) ++{ ++#ifdef X509_V_FLAG_CRL_CHECK_ALL ++#ifdef HAVE_SSL_CTX_GET0_PARAM ++ X509_VERIFY_PARAM *vpm; ++ ++ vpm = SSL_CTX_get0_param(ssl_ctx); ++ if (vpm) { ++ unsigned long flags; ++ ++ flags = X509_VERIFY_PARAM_get_flags(vpm); ++ flags |= (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL); ++ X509_VERIFY_PARAM_set_flags(vpm, flags); ++ } ++#else ++ X509_STORE *x509store; ++ ++ x509store = SSL_CTX_get_cert_store(ssl_ctx); ++ X509_STORE_set_flags(x509store, ++ X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL); ++#endif ++#else ++ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL"); ++#endif ++} ++ + int +-ssl_setup_connection_prefix(rpc_transport_t *this) ++ssl_setup_connection_prefix(rpc_transport_t *this, gf_boolean_t server) + { + int ret = -1; + socket_private_t *priv = NULL; +@@ -332,6 +389,9 @@ ssl_setup_connection_prefix(rpc_transport_t *this) + priv->ssl_accepted = _gf_false; + priv->ssl_context_created = _gf_false; + ++ if (!server && priv->crl_path) ++ ssl_clear_crl_verify_flags(priv->ssl_ctx); ++ + priv->ssl_ssl = SSL_new(priv->ssl_ctx); + if (!priv->ssl_ssl) { + gf_log(this->name, GF_LOG_ERROR, "SSL_new failed"); +@@ -2664,7 +2724,7 @@ ssl_handle_server_connection_attempt(rpc_transport_t *this) + fd = priv->sock; + + if (!priv->ssl_context_created) { +- ret = ssl_setup_connection_prefix(this); ++ ret = ssl_setup_connection_prefix(this, _gf_true); + if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "> ssl_setup_connection_prefix() failed!"); +@@ -2718,7 +2778,7 @@ ssl_handle_client_connection_attempt(rpc_transport_t *this) + ret = -1; + } else { + if (!priv->ssl_context_created) { +- ret = ssl_setup_connection_prefix(this); ++ ret = ssl_setup_connection_prefix(this, _gf_false); + if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "> ssl_setup_connection_prefix() " +@@ -3085,7 +3145,30 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + gf_log(this->name, GF_LOG_TRACE, "XXX server:%s, client:%s", + new_trans->myinfo.identifier, new_trans->peerinfo.identifier); + ++ /* Make options available to local socket_init() to create new ++ * SSL_CTX per transport. A separate SSL_CTX per transport is ++ * required to avoid setting crl checking options for client ++ * connections. The verification options eventually get copied ++ * to the SSL object. Unfortunately, there's no way to identify ++ * whether socket_init() is being called after a client-side ++ * connect() or a server-side accept(). Although, we could pass ++ * a flag from the transport init() to the socket_init() and ++ * from this place, this doesn't identify the case where the ++ * server-side transport loading is done for the first time. ++ * Also, SSL doesn't apply for UNIX sockets. ++ */ ++ if (new_sockaddr.ss_family != AF_UNIX) ++ new_trans->options = dict_ref(this->options); ++ new_trans->ctx = this->ctx; ++ + ret = socket_init(new_trans); ++ ++ /* reset options to NULL to avoid double free */ ++ if (new_sockaddr.ss_family != AF_UNIX) { ++ dict_unref(new_trans->options); ++ new_trans->options = NULL; ++ } ++ + if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "initialization of new_trans " +@@ -4150,7 +4233,6 @@ ssl_setup_connection_params(rpc_transport_t *this) + char *cipher_list = DEFAULT_CIPHER_LIST; + char *dh_param = DEFAULT_DH_PARAM; + char *ec_curve = DEFAULT_EC_CURVE; +- char *crl_path = NULL; + + priv = this->private; + +@@ -4192,6 +4274,7 @@ ssl_setup_connection_params(rpc_transport_t *this) + } + priv->ssl_ca_list = gf_strdup(priv->ssl_ca_list); + ++ optstr = NULL; + if (dict_get_str(this->options, SSL_CRL_PATH_OPT, &optstr) == 0) { + if (!priv->ssl_enabled) { + gf_log(this->name, GF_LOG_WARNING, +@@ -4199,9 +4282,9 @@ ssl_setup_connection_params(rpc_transport_t *this) + SSL_ENABLED_OPT); + } + if (strcasecmp(optstr, "NULL") == 0) +- crl_path = NULL; ++ priv->crl_path = NULL; + else +- crl_path = optstr; ++ priv->crl_path = gf_strdup(optstr); + } + + gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG, +@@ -4343,24 +4426,15 @@ ssl_setup_connection_params(rpc_transport_t *this) + } + + if (!SSL_CTX_load_verify_locations(priv->ssl_ctx, priv->ssl_ca_list, +- crl_path)) { ++ priv->crl_path)) { + gf_log(this->name, GF_LOG_ERROR, "could not load CA list"); + goto err; + } + + SSL_CTX_set_verify_depth(priv->ssl_ctx, cert_depth); + +- if (crl_path) { +-#ifdef X509_V_FLAG_CRL_CHECK_ALL +- X509_STORE *x509store; +- +- x509store = SSL_CTX_get_cert_store(priv->ssl_ctx); +- X509_STORE_set_flags( +- x509store, X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL); +-#else +- gf_log(this->name, GF_LOG_ERROR, +- "OpenSSL version does not support CRL"); +-#endif ++ if (priv->crl_path) { ++ ssl_set_crl_verify_flags(priv->ssl_ctx); + } + + priv->ssl_session_id = session_id++; +diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h +index e1ccae2..e7c0090 100644 +--- a/rpc/rpc-transport/socket/src/socket.h ++++ b/rpc/rpc-transport/socket/src/socket.h +@@ -14,6 +14,7 @@ + #include <openssl/ssl.h> + #include <openssl/err.h> + #include <openssl/x509v3.h> ++#include <openssl/x509_vfy.h> + #ifdef HAVE_OPENSSL_DH_H + #include <openssl/dh.h> + #endif +@@ -246,6 +247,7 @@ typedef struct { + char *ssl_own_cert; + char *ssl_private_key; + char *ssl_ca_list; ++ char *crl_path; + int pipe[2]; + struct gf_sock_incoming incoming; + /* -1 = not connected. 0 = in progress. 1 = connected */ +diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t +index 563d37c..7e1e199 100644 +--- a/tests/features/ssl-ciphers.t ++++ b/tests/features/ssl-ciphers.t +@@ -175,8 +175,6 @@ BRICK_PORT=`brick_port $V0` + EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + # test revocation +-# no need to restart the volume since the options are used +-# by the client here. + TEST $CLI volume set $V0 ssl.crl-path $TMPDIR + EXPECT $TMPDIR volume_option $V0 ssl.crl-path + $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 +@@ -189,14 +187,25 @@ TEST openssl ca -batch -config $SSL_CFG -revoke $SSL_CERT 2>&1 + TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1 + + # Failed once revoked ++# Although client fails to mount without restarting the server after crl-path ++# is set when no actual crl file is found on the client, it would also fail ++# when server is restarted for the same reason. Since the socket initialization ++# code is the same for client and server, the crl verification flags need to ++# be turned off for the client to avoid SSL searching for CRLs in the ++# ssl.crl-path. If no CRL files are found in the ssl.crl-path, SSL fails the ++# connect() attempt on the client. ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 + $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + EXPECT "N" wait_mount $M0 + TEST ! test -f $TEST_FILE + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + + # Succeed with CRL disabled ++TEST $CLI volume stop $V0 + TEST $CLI volume set $V0 ssl.crl-path NULL + EXPECT NULL volume_option $V0 ssl.crl-path ++TEST $CLI volume start $V0 + $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + EXPECT "Y" wait_mount $M0 + TEST test -f $TEST_FILE +-- +1.8.3.1 + diff --git a/SOURCES/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch b/SOURCES/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch new file mode 100644 index 0000000..ec6ed8a --- /dev/null +++ b/SOURCES/0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch @@ -0,0 +1,36 @@ +From e44b75fdb86dcf759204816c873b4f9f4efbefa8 Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Tue, 21 May 2019 16:17:09 +0530 +Subject: [PATCH 142/169] lock: check null value of dict to avoid log flooding + +> updates: bz#1712322 +> Change-Id: I120a1d23506f9ebcf88c7ea2f2eff4978a61cf4a +> Signed-off-by: Susant Palai <spalai@redhat.com> +(backport of fix https://review.gluster.org/#/c/glusterfs/+/22756/) + +BUG: bz#1704181 +Change-Id: I2a192236328ebb39666ffef1146df312c08a377d +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/171325 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 3f1c7a7..adb0df5 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -121,7 +121,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + + #define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \ + do { \ +- if (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK) || \ ++ if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \ + (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \ + inode_t *__inode = (loc ? loc->inode : fd->inode); \ + pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \ +-- +1.8.3.1 + diff --git a/SOURCES/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch b/SOURCES/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch new file mode 100644 index 0000000..fd25a69 --- /dev/null +++ b/SOURCES/0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch @@ -0,0 +1,42 @@ +From 43fb1d9d3890c44108b466d308177428fb8217aa Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 27 May 2019 10:11:39 +0530 +Subject: [PATCH 143/169] packaging : Change the dependency on nfs-ganesha to + 2.7 for glusterfs-ganesha + +Change-Id: I16a3f32eddfcbf745d67de9dc7440e2fc6ef2315 +fixes: bz#1714078 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/171471 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 86a1527..ed58356 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -460,7 +460,7 @@ Summary: NFS-Ganesha configuration + Group: Applications/File + + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: nfs-ganesha-gluster >= 2.4.1 ++Requires: nfs-ganesha-gluster >= 2.7.3 + Requires: pcs, dbus + %if ( 0%{?rhel} && 0%{?rhel} == 6 ) + Requires: cman, pacemaker, corosync +@@ -1933,6 +1933,9 @@ fi + %endif + + %changelog ++* Mon May 27 2019 Jiffin Tony Thottan <jthottan@redhat.com> ++- Change the dependency to 2.7.3 on nfs-ganesha for glusterfs-ganesha (#1714078) ++ + * Sun Apr 7 2019 Jiffin Tony Thottan <jthottan@redhat.com> + - DOWNSTREAM ONLY - revert of 83abcb(gnfs in an optional subpackage) + +-- +1.8.3.1 + diff --git a/SOURCES/0144-cluster-ec-honor-contention-notifications-for-partia.patch b/SOURCES/0144-cluster-ec-honor-contention-notifications-for-partia.patch new file mode 100644 index 0000000..40a6aa8 --- /dev/null +++ b/SOURCES/0144-cluster-ec-honor-contention-notifications-for-partia.patch @@ -0,0 +1,114 @@ +From ff8a74250209f4279f67dd89c3e57b2289a1b7d1 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Thu, 9 May 2019 11:07:18 +0200 +Subject: [PATCH 144/169] cluster/ec: honor contention notifications for + partially acquired locks + +EC was ignoring lock contention notifications received while a lock was +being acquired. When a lock is partially acquired (some bricks have +granted the lock but some others not yet) we can receive notifications +from acquired bricks, which should be honored, since we may not receive +more notifications after that. + +Since EC was ignoring them, once the lock was acquired, it was not +released until the eager-lock timeout, causing unnecessary delays on +other clients. + +This fix takes into consideration the notifications received before +having completed the full lock acquisition. After that, the lock will +be releaed as soon as possible. + +Upstream patch: +> BUG: 1708156 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22690 +> Change-Id: I2a306dbdb29fb557dcab7788a258bd75d826cc12 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Fixes: bz#1703455 +Change-Id: I2a306dbdb29fb557dcab7788a258bd75d826cc12 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/171525 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + ...or-inodelk-contention-notify-on-partial-locks.t | 54 ++++++++++++++++++++++ + xlators/cluster/ec/src/ec-common.c | 2 +- + 2 files changed, 55 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t + +diff --git a/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t +new file mode 100644 +index 0000000..67fdb18 +--- /dev/null ++++ b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t +@@ -0,0 +1,54 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++function do_ls() { ++ local dir="${1}" ++ local i ++ ++ for i in {1..50}; do ++ ls -l $M0/${dir} >/dev/null & ++ ls -l $M1/${dir} >/dev/null & ++ ls -l $M2/${dir} >/dev/null & ++ ls -l $M3/${dir} >/dev/null & ++ done ++ wait ++} ++ ++function measure_time() { ++ { ++ LC_ALL=C ++ time -p "${@}" ++ } 2>&1 | awk '/^real/ { print $2 * 1000 }' ++} ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++ ++TEST $CLI volume set $V0 disperse.eager-lock on ++TEST $CLI volume set $V0 disperse.other-eager-lock on ++TEST $CLI volume set $V0 features.locks-notify-contention on ++TEST $CLI volume set $V0 disperse.eager-lock-timeout 10 ++TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 10 ++ ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M2 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M3 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M3 ++TEST mkdir $M0/dir ++TEST touch $M0/dir/file.{1..10} ++ ++# Run multiple 'ls' concurrently from multiple clients so that they collide and ++# cause partial locks. ++TEST [[ $(measure_time do_ls dir) -lt 10000 ]] ++ ++cleanup +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index b1ba5e9..e85aa8b 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2497,7 +2497,7 @@ ec_lock_release(ec_t *ec, inode_t *inode) + goto done; + } + lock = ctx->inode_lock; +- if ((lock == NULL) || !lock->acquired || lock->release) { ++ if ((lock == NULL) || lock->release) { + goto done; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch b/SOURCES/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch new file mode 100644 index 0000000..398f460 --- /dev/null +++ b/SOURCES/0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch @@ -0,0 +1,65 @@ +From 55d47524c0c8a88204129c3a94d71779aae00beb Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Tue, 28 May 2019 08:18:12 +0530 +Subject: [PATCH 145/169] core: Capture process memory usage at the time of + call gf_msg_nomem + +Problem: All gluster processes call gf_mgm_nomem while calloc/malloc/realloc + throw an error but the message does not capture current memory usage of + gluster process + +Solution: Call getrusage to capture current memory usage of gluster + process + +> Change-Id: I2e0319da1f33b177fa042fdc9e7268068576c9c3 +> fixes: bz#1708051 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22688/ +> Cherry pick from commit 8e1d53f14730ac1b1ca0ce9d9a0ccb32578fd4fb + +BUG: 1709087 +Change-Id: I2e0319da1f33b177fa042fdc9e7268068576c9c3 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/171587 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/logging.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c +index 5d46916..7f0eff6 100644 +--- a/libglusterfs/src/logging.c ++++ b/libglusterfs/src/logging.c +@@ -17,6 +17,7 @@ + #include <string.h> + #include <stdlib.h> + #include <syslog.h> ++#include <sys/resource.h> + + #ifdef HAVE_BACKTRACE + #include <execinfo.h> +@@ -1196,6 +1197,7 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, + glusterfs_ctx_t *ctx = NULL; + int wlen = 0; + int priority; ++ struct rusage r_usage; + + this = THIS; + ctx = this->ctx; +@@ -1231,10 +1233,11 @@ _gf_msg_nomem(const char *domain, const char *file, const char *function, + "]" + " [%s:%d:%s] %s: no memory " + "available for size (%" GF_PRI_SIZET +- ")" ++ ") current memory usage in kilobytes %ld" + " [call stack follows]\n", + timestr, gf_level_strings[level], (uint64_t)0, basename, +- line, function, domain, size); ++ line, function, domain, size, ++ (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0)); + if (-1 == ret) { + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch b/SOURCES/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch new file mode 100644 index 0000000..50747cc --- /dev/null +++ b/SOURCES/0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch @@ -0,0 +1,146 @@ +From 8cc721ee43ac8038eecb712278378710ad0745ed Mon Sep 17 00:00:00 2001 +From: root <root@localhost.localdomain> +Date: Sun, 7 Apr 2019 19:31:17 +0530 +Subject: [PATCH 146/169] dht: Custom xattrs are not healed in case of + add-brick + +Problem: If any custom xattrs are set on the directory before + add a brick, xattrs are not healed on the directory + after adding a brick. + +Solution: xattr are not healed because dht_selfheal_dir_mkdir_lookup_cbk + checks the value of MDS and if MDS value is not negative + selfheal code path does not take reference of MDS xattrs.Change the + condition to take reference of MDS xattr so that custom xattrs are + populated on newly added brick + +> Updates: bz#1702299 +> Change-Id: Id14beedb98cce6928055f294e1594b22132e811c +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit aa52259de7b50625b754ce9fb5c0f38e22d79dd6) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22520/) + +BUG: 1702298 +Change-Id: Id14beedb98cce6928055f294e1594b22132e811c +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/171591 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/bug-1702299.t | 67 ++++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-selfheal.c | 9 +---- + 2 files changed, 68 insertions(+), 8 deletions(-) + create mode 100644 tests/bugs/bug-1702299.t + +diff --git a/tests/bugs/bug-1702299.t b/tests/bugs/bug-1702299.t +new file mode 100644 +index 0000000..1cff2ed +--- /dev/null ++++ b/tests/bugs/bug-1702299.t +@@ -0,0 +1,67 @@ ++#!/bin/bash ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../dht.rc ++cleanup; ++ ++function get_getfattr { ++ local path=$1 ++ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' ++} ++ ++function set_fattr { ++ for i in `seq 1 10` ++ do ++ setfattr -n user.foo -v "newabc" ./tmp${i} ++ if [ "$?" = "0" ] ++ then ++ succ=$((succ+1)) ++ else ++ fail=$((fail+1)) ++ fi ++ done ++} ++ ++ ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0; ++ ++cd $M0 ++TEST mkdir tmp{1..10} ++ ++succ=fail=0 ++## set user.foo xattr with value newabc after kill one brick ++set_fattr ++count=10 ++EXPECT "$succ" echo $count ++count=0 ++EXPECT "$fail" echo $count ++ ++cd - ++ ++# Add-brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5} ++ ++cd $M0 ++## At this point dht code will heal xattr on down brick only for those dirs ++## hashed subvol was up at the time of update xattr ++TEST stat ./tmp{1..10} ++ ++ ++## Count the user.foo xattr value with newabc on brick and compare with succ value ++count=`getfattr -n user.foo $B0/${V0}4/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l` ++EXPECT "$succ" echo $count ++ ++## Count the user.foo xattr value with newabc on brick and compare with succ value ++count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l` ++EXPECT "$succ" echo $count ++ ++ ++cd - ++TEST umount $M0 ++cleanup +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index 5420fca..f5dfff9 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -1310,12 +1310,8 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, + int this_call_cnt = 0; + int missing_dirs = 0; + dht_layout_t *layout = NULL; +- dht_conf_t *conf = 0; + xlator_t *prev = 0; + loc_t *loc = NULL; +- int check_mds = 0; +- int errst = 0; +- int32_t mds_xattr_val[1] = {0}; + char gfid_local[GF_UUID_BUF_SIZE] = {0}; + int index = -1; + +@@ -1324,7 +1320,6 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, + local = frame->local; + layout = local->layout; + loc = &local->loc; +- conf = this->private; + prev = cookie; + + if (!gf_uuid_is_null(local->gfid)) +@@ -1347,9 +1342,7 @@ dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie, + + if (!op_ret) { + dht_iatt_merge(this, &local->stbuf, stbuf); +- check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, +- mds_xattr_val, 1, &errst); +- if (dict_get(xattr, conf->mds_xattr_key) && check_mds && !errst) { ++ if (prev == local->mds_subvol) { + dict_unref(local->xattr); + local->xattr = dict_ref(xattr); + } +-- +1.8.3.1 + diff --git a/SOURCES/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch b/SOURCES/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch new file mode 100644 index 0000000..27f8a4e --- /dev/null +++ b/SOURCES/0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch @@ -0,0 +1,80 @@ +From d7795a592883cfb01da76b6905a7c9eb1e912bef Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Tue, 28 May 2019 08:28:29 +0530 +Subject: [PATCH 147/169] glusterd: bulkvoldict thread is not handling all + volumes + +Problem: In commit ac70f66c5805e10b3a1072bd467918730c0aeeb4 I + missed one condition to populate volume dictionary in + multiple threads while brick_multiplex is enabled.Due + to that glusterd is not sending volume dictionary for + all volumes to peer. + +Solution: Update the condition in code as well as update test case + also to avoid the issue + +> Change-Id: I06522dbdfee4f7e995d9cc7b7098fdf35340dc52 +> fixes: bz#1711250 +> Cherry pick from commit 4a5fb52eb1c5387a0fb8bfa1253e5227c7c255e8 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22739/ + +BUG: 1711249 +Change-Id: I06522dbdfee4f7e995d9cc7b7098fdf35340dc52 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/171589 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/bug-1699339.t | 16 ++++++++++------ + xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- + 2 files changed, 11 insertions(+), 7 deletions(-) + +diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t +index 3e950f4..bb8d4f4 100644 +--- a/tests/bugs/glusterd/bug-1699339.t ++++ b/tests/bugs/glusterd/bug-1699339.t +@@ -52,18 +52,22 @@ done + + TEST kill_glusterd 1 + +-vol1=$(printf "%s-vol%02d" $V0 1) ++TESTS_EXPECTED_IN_LOOP=4 ++for i in `seq 1 3 15` ++do ++vol1=$(printf "%s-vol%02d" $V0 $i) + TEST $CLI_2 volume set $vol1 performance.readdir-ahead on +-vol2=$(printf "%s-vol%02d" $V0 2) +-TEST $CLI_2 volume set $vol2 performance.readdir-ahead on ++done + + # Bring back 1st glusterd + TEST $glusterd_1 + EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + ++TESTS_EXPECTED_IN_LOOP=4 ++for i in `seq 1 3 15` ++do ++vol1=$(printf "%s-vol%02d" $V0 $i) + EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead +- +-vol_name=$(printf "%s-vol%02d" $V0 2) +-EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead ++done + + cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index efa5a86..8f1525e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3542,7 +3542,7 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + if ((i + 1) != totthread) { + arg->end = ((i + 1) * vol_per_thread_limit); + } else { +- arg->end = ((i * vol_per_thread_limit) + endindex); ++ arg->end = (((i + 1) * vol_per_thread_limit) + endindex); + } + th_ret = gf_thread_create_detached( + &th_id, glusterd_add_bulk_volumes_create_thread, arg, +-- +1.8.3.1 + diff --git a/SOURCES/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch b/SOURCES/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch new file mode 100644 index 0000000..b1a5651 --- /dev/null +++ b/SOURCES/0148-cluster-dht-Lookup-all-files-when-processing-directo.patch @@ -0,0 +1,70 @@ +From 92aadb6a5eeec75edf7f5a11a0ebd861dd85ca6b Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Mon, 20 May 2019 15:23:42 +0530 +Subject: [PATCH 148/169] cluster/dht: Lookup all files when processing + directory + +A rebalance process currently only looks up files +that it is supposed to migrate. This could cause issues +when lookup-optimize is enabled as the dir layout can be +updated with the commit hash before all files are looked up. +This is expecially problematic if one of the rebalance processes +fails to complete as clients will try to access files whose +linkto files might not have been created. +Each process will now lookup every file in the directory it is +processing. +Pros: Less likely that files will be inaccessible. +Cons: More lookup requests sent to the bricks and a potential +performance hit. +Note: this does not handle races such as when a layout is updated on disk +just as the create fop is sent by the client. + +upstream : https://review.gluster.org/#/c/glusterfs/+/22746/ + +>Change-Id: I22b55846effc08d3b827c3af9335229335f67fb8 +>fixes: bz#1711764 + +BUG#1714124 + +Change-Id: Ica6a9459befe53957f080001a2dda525b3b14d1c +Signed-off-by: N Balachandran <nbalacha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172080 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index efbe8a4..559f046 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -2741,12 +2741,6 @@ gf_defrag_migrate_single_file(void *opaque) + goto out; + } + +- if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, +- entry->d_stat.ia_gfid)) { +- gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); +- goto out; +- } +- + gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid); + + gf_uuid_copy(entry_loc.pargfid, loc->gfid); +@@ -2772,6 +2766,12 @@ gf_defrag_migrate_single_file(void *opaque) + goto out; + } + ++ if (!gf_defrag_should_i_migrate(this, rebal_entry->local_subvol_index, ++ entry->d_stat.ia_gfid)) { ++ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path); ++ goto out; ++ } ++ + iatt_ptr = &iatt; + + hashed_subvol = dht_subvol_get_hashed(this, &entry_loc); +-- +1.8.3.1 + diff --git a/SOURCES/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch b/SOURCES/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch new file mode 100644 index 0000000..5caf3d4 --- /dev/null +++ b/SOURCES/0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch @@ -0,0 +1,452 @@ +From 86eee7e829bb33cac9b611da511ecbd2f03fab25 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Fri, 17 May 2019 19:26:48 +0530 +Subject: [PATCH 149/169] glusterd: Optimize code to copy dictionary in + handshake code path + +Problem: While high no. of volumes are configured around 2000 + glusterd has bottleneck during handshake at the time + of copying dictionary + +Solution: To avoid the bottleneck serialize a dictionary instead + of copying key-value pair one by one + +> Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a +> fixes: bz#1711297 +> Cherry picked from commit f8f09178bb890924a8050b466cc2e7a0a30e35a7 +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22742/) + +BUG: 1711296 +Change-Id: I9fb332f432e4f915bc3af8dcab38bed26bda2b9a +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172255 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + libglusterfs/src/dict.c | 6 +- + libglusterfs/src/glusterfs/dict.h | 6 + + libglusterfs/src/libglusterfs.sym | 1 + + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 27 ++-- + xlators/mgmt/glusterd/src/glusterd-utils.c | 187 +++++++++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +- + xlators/mgmt/glusterd/src/glusterd.h | 5 + + 7 files changed, 194 insertions(+), 41 deletions(-) + +diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c +index 4cd1fcf..6917df9 100644 +--- a/libglusterfs/src/dict.c ++++ b/libglusterfs/src/dict.c +@@ -2799,10 +2799,6 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) + * 4 4 4 <key len> <value len> + */ + +-#define DICT_HDR_LEN 4 +-#define DICT_DATA_HDR_KEY_LEN 4 +-#define DICT_DATA_HDR_VAL_LEN 4 +- + /** + * dict_serialized_length_lk - return the length of serialized dict. This + * procedure has to be called with this->lock held. +@@ -2812,7 +2808,7 @@ dict_rename_key(dict_t *this, char *key, char *replace_key) + * : failure: -errno + */ + +-static int ++int + dict_serialized_length_lk(dict_t *this) + { + int ret = -EINVAL; +diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h +index 52b833f..022f564 100644 +--- a/libglusterfs/src/glusterfs/dict.h ++++ b/libglusterfs/src/glusterfs/dict.h +@@ -91,6 +91,9 @@ typedef struct _data_pair data_pair_t; + #define DICT_MAX_FLAGS 256 + #define DICT_FLAG_SET 1 + #define DICT_FLAG_CLEAR 0 ++#define DICT_HDR_LEN 4 ++#define DICT_DATA_HDR_KEY_LEN 4 ++#define DICT_DATA_HDR_VAL_LEN 4 + + struct _data { + char *data; +@@ -412,4 +415,7 @@ are_dicts_equal(dict_t *one, dict_t *two, + gf_boolean_t (*value_ignore)(char *k)); + int + dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result); ++ ++int ++dict_serialized_length_lk(dict_t *this); + #endif +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index cf5757c..ec474e7 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -405,6 +405,7 @@ dict_rename_key + dict_reset + dict_serialize + dict_serialized_length ++dict_serialized_length_lk + dict_serialize_value_with_delim + dict_set + dict_setn +diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +index 4ec9700..45f8f17 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +@@ -1528,11 +1528,9 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) + + RCU_READ_UNLOCK; + +- ret = glusterd_add_volumes_to_export_dict(&peer_data); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, +- "Unable to add list of volumes " +- "in the peer_data dict for handshake"); ++ peer_data = dict_new(); ++ if (!peer_data) { ++ errno = ENOMEM; + goto out; + } + +@@ -1563,10 +1561,23 @@ glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data) + } + } + +- ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val, +- &req.vols.vols_len); +- if (ret) ++ /* Don't add any key-value in peer_data dictionary after call this function ++ */ ++ ret = glusterd_add_volumes_to_export_dict(peer_data, &req.vols.vols_val, ++ &req.vols.vols_len); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, ++ "Unable to add list of volumes " ++ "in the peer_data dict for handshake"); + goto out; ++ } ++ ++ if (!req.vols.vols_len) { ++ ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val, ++ &req.vols.vols_len); ++ if (ret) ++ goto out; ++ } + + ret = glusterd_submit_request( + peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_ADD, NULL, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 8f1525e..2bc4836 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3466,11 +3466,118 @@ out: + return NULL; + } + ++int ++glusterd_dict_searialize(dict_t *dict_arr[], int count, int totcount, char *buf) ++{ ++ int i = 0; ++ int32_t keylen = 0; ++ int64_t netword = 0; ++ data_pair_t *pair = NULL; ++ int dict_count = 0; ++ int ret = 0; ++ ++ netword = hton32(totcount); ++ memcpy(buf, &netword, sizeof(netword)); ++ buf += DICT_HDR_LEN; ++ ++ for (i = 0; i < count; i++) { ++ if (dict_arr[i]) { ++ dict_count = dict_arr[i]->count; ++ pair = dict_arr[i]->members_list; ++ while (dict_count) { ++ if (!pair) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, ++ LG_MSG_PAIRS_LESS_THAN_COUNT, ++ "less than count data pairs found!"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (!pair->key) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, ++ "pair->key is null!"); ++ ret = -1; ++ goto out; ++ } ++ ++ keylen = strlen(pair->key); ++ netword = hton32(keylen); ++ memcpy(buf, &netword, sizeof(netword)); ++ buf += DICT_DATA_HDR_KEY_LEN; ++ if (!pair->value) { ++ gf_msg("glusterd", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, ++ "pair->value is null!"); ++ ret = -1; ++ goto out; ++ } ++ ++ netword = hton32(pair->value->len); ++ memcpy(buf, &netword, sizeof(netword)); ++ buf += DICT_DATA_HDR_VAL_LEN; ++ ++ memcpy(buf, pair->key, keylen); ++ buf += keylen; ++ *buf++ = '\0'; ++ ++ if (pair->value->data) { ++ memcpy(buf, pair->value->data, pair->value->len); ++ buf += pair->value->len; ++ } ++ ++ pair = pair->next; ++ dict_count--; ++ } ++ } ++ } ++ ++out: ++ for (i = 0; i < count; i++) { ++ if (dict_arr[i]) ++ dict_unref(dict_arr[i]); ++ } ++ return ret; ++} ++ ++int ++glusterd_dict_arr_serialize(dict_t *dict_arr[], int count, char **buf, ++ u_int *length) ++{ ++ ssize_t len = 0; ++ int i = 0; ++ int totcount = 0; ++ int ret = 0; ++ ++ for (i = 0; i < count; i++) { ++ if (dict_arr[i]) { ++ len += dict_serialized_length_lk(dict_arr[i]); ++ totcount += dict_arr[i]->count; ++ } ++ } ++ ++ // Subtract HDR_LEN except one dictionary ++ len = len - ((count - 1) * DICT_HDR_LEN); ++ ++ *buf = GF_MALLOC(len, gf_common_mt_char); ++ if (*buf == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (length != NULL) { ++ *length = len; ++ } ++ ++ ret = glusterd_dict_searialize(dict_arr, count, totcount, *buf); ++ ++out: ++ return ret; ++} ++ + int32_t +-glusterd_add_volumes_to_export_dict(dict_t **peer_data) ++glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, ++ u_int *length) + { + int32_t ret = -1; +- dict_t *dict = NULL; + dict_t *dict_arr[128] = { + 0, + }; +@@ -3496,10 +3603,6 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + priv = this->private; + GF_ASSERT(priv); + +- dict = dict_new(); +- if (!dict) +- goto out; +- + /* Count the total number of volumes */ + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++; + +@@ -3520,14 +3623,15 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; +- ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); ++ ret = glusterd_add_volume_to_dict(volinfo, peer_data, count, ++ "volume"); + if (ret) + goto out; + + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + continue; + +- ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, ++ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, peer_data, count, + "volume"); + if (ret) + goto out; +@@ -3569,34 +3673,34 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) + + gf_log(this->name, GF_LOG_INFO, + "Finished dictionary popluation in all threads"); +- for (i = 0; i < totthread; i++) { +- dict_copy_with_ref(dict_arr[i], dict); +- dict_unref(dict_arr[i]); +- } +- gf_log(this->name, GF_LOG_INFO, +- "Finished merger of all dictionraies into single one"); + } + +- ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt); ++ ret = dict_set_int32n(peer_data, "count", SLEN("count"), volcnt); + if (ret) + goto out; + +- ctx.dict = dict; ++ ctx.dict = peer_data; + ctx.prefix = "global"; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "val"; + dict_foreach(priv->opts, _add_dict_to_prdict, &ctx); + ctx.opt_count--; +- ret = dict_set_int32n(dict, "global-opt-count", SLEN("global-opt-count"), +- ctx.opt_count); ++ ret = dict_set_int32n(peer_data, "global-opt-count", ++ SLEN("global-opt-count"), ctx.opt_count); + if (ret) + goto out; + +- *peer_data = dict; ++ if (totthread) { ++ gf_log(this->name, GF_LOG_INFO, ++ "Finished merger of all dictionraies into single one"); ++ dict_arr[totthread++] = peer_data; ++ ret = glusterd_dict_arr_serialize(dict_arr, totthread, buf, length); ++ gf_log(this->name, GF_LOG_INFO, ++ "Serialize dictionary data return is %d", ret); ++ } ++ + out: +- if (ret) +- dict_unref(dict); + + gf_msg_trace(this->name, 0, "Returning %d", ret); + return ret; +@@ -4940,6 +5044,7 @@ glusterd_import_friend_volumes_synctask(void *opaque) + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + dict_t *peer_data = NULL; ++ glusterd_friend_synctask_args_t *arg = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -4947,8 +5052,20 @@ glusterd_import_friend_volumes_synctask(void *opaque) + conf = this->private; + GF_ASSERT(conf); + +- peer_data = (dict_t *)opaque; +- GF_ASSERT(peer_data); ++ arg = opaque; ++ if (!arg) ++ goto out; ++ ++ peer_data = dict_new(); ++ if (!peer_data) { ++ goto out; ++ } ++ ++ ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data); ++ if (ret) { ++ errno = ENOMEM; ++ goto out; ++ } + + ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); + if (ret) +@@ -4980,6 +5097,11 @@ glusterd_import_friend_volumes_synctask(void *opaque) + out: + if (peer_data) + dict_unref(peer_data); ++ if (arg) { ++ if (arg->dict_buf) ++ GF_FREE(arg->dict_buf); ++ GF_FREE(arg); ++ } + + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +@@ -5146,7 +5268,7 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + gf_boolean_t update = _gf_false; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; +- dict_t *peer_data_copy = NULL; ++ glusterd_friend_synctask_args_t *arg = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -5188,12 +5310,23 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + * first brick to come up before attaching the subsequent bricks + * in case brick multiplexing is enabled + */ +- peer_data_copy = dict_copy_with_ref(peer_data, NULL); +- glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, +- peer_data_copy); ++ arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char); ++ ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf, ++ &arg->dictlen); ++ if (ret < 0) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "dict_serialize failed while handling " ++ " import friend volume request"); ++ goto out; ++ } ++ ++ glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg); + } + + out: ++ if (ret && arg) { ++ GF_FREE(arg); ++ } + gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret, + *status); + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 3647c34..6ad8062 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -227,7 +227,8 @@ glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo, + gf_boolean_t construct_real_path); + + int32_t +-glusterd_add_volumes_to_export_dict(dict_t **peer_data); ++glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, ++ u_int *length); + + int32_t + glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 2ea8560..f96bca3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -240,6 +240,11 @@ typedef struct glusterd_add_dict_args { + int end; + } glusterd_add_dict_args_t; + ++typedef struct glusterd_friend_synctask_args { ++ char *dict_buf; ++ u_int dictlen; ++} glusterd_friend_synctask_args_t; ++ + typedef enum gf_brick_status { + GF_BRICK_STOPPED, + GF_BRICK_STARTED, +-- +1.8.3.1 + diff --git a/SOURCES/0150-libglusterfs-define-macros-needed-for-cloudsync.patch b/SOURCES/0150-libglusterfs-define-macros-needed-for-cloudsync.patch new file mode 100644 index 0000000..b5fbef2 --- /dev/null +++ b/SOURCES/0150-libglusterfs-define-macros-needed-for-cloudsync.patch @@ -0,0 +1,38 @@ +From 4c410d99792808b0c8deb601d50c66df19f73ca7 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur <atalur@commvault.com> +Date: Thu, 25 Oct 2018 17:23:10 -0400 +Subject: [PATCH 150/169] libglusterfs: define macros needed for cloudsync + +backport of patch: https://review.gluster.org/#/c/glusterfs/+/21585/ + +> Change-Id: Iec5ce7f17fbf899f881a58cd20c4c967e3b71668 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur <atalur@commvault.com> + +Change-Id: I79e5d955559acdec7cbeb8f35c8482b3b6ff8b0f +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172189 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/glusterfs/glusterfs.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index fb727fc..516b497 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -364,6 +364,10 @@ enum gf_internal_fop_indicator { + } while (0) + + #define GF_CS_OBJECT_SIZE "trusted.glusterfs.cs.object_size" ++#define GF_CS_BLOCK_SIZE "trusted.glusterfs.cs.block_size" ++#define GF_CS_NUM_BLOCKS "trusted.glusterfs.cs.num_blocks" ++ ++#define GF_CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid" + + #define GF_CS_OBJECT_UPLOAD_COMPLETE "trusted.glusterfs.csou.complete" + #define GF_CS_OBJECT_REMOTE "trusted.glusterfs.cs.remote" +-- +1.8.3.1 + diff --git a/SOURCES/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch b/SOURCES/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch new file mode 100644 index 0000000..d95db3d --- /dev/null +++ b/SOURCES/0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch @@ -0,0 +1,156 @@ +From bffdcce7119f3ed68694df918e504cc241502835 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur <atalur@commvault.com> +Date: Mon, 19 Nov 2018 17:57:18 -0800 +Subject: [PATCH 151/169] mgmt/glusterd: Make changes related to cloudsync + xlator + +1) The placement of cloudsync xlator has been changed +to make it shard xlator's child. If cloudsync has to +work with shard in the graph, it needs to be child of shard. + +backport of: https://review.gluster.org/#/c/glusterfs/+/21681/ + +> Change-Id: Ib55424fdcb7ce8edae9f19b8a6e3d3ba86c1f0c4 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur <atalur@commvault.com> + +Change-Id: I68fd43b2c559cc2d9f05e1ab19784b174233d690 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172190 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/glusterd/check-cloudsync-ancestry.t | 48 +++++++++++++++++++++++++ + tests/volume.rc | 21 +++++++++++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 24 ++++++------- + 3 files changed, 81 insertions(+), 12 deletions(-) + create mode 100644 tests/basic/glusterd/check-cloudsync-ancestry.t + +diff --git a/tests/basic/glusterd/check-cloudsync-ancestry.t b/tests/basic/glusterd/check-cloudsync-ancestry.t +new file mode 100644 +index 0000000..ff6ffee +--- /dev/null ++++ b/tests/basic/glusterd/check-cloudsync-ancestry.t +@@ -0,0 +1,48 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++# When shard and cloudsync xlators enabled on a volume, shard xlator ++# should be an ancestor of cloudsync. This testcase is to check this condition. ++ ++cleanup; ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3 ++ ++volfile=$(gluster system:: getwd)"/vols/$V0/trusted-$V0.tcp-fuse.vol" ++ ++#Test that both shard and cloudsync are not loaded ++EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++#Enable shard and cloudsync in that order and check if volfile is correct ++TEST $CLI volume set $V0 shard on ++TEST $CLI volume set $V0 cloudsync on ++ ++#Test that both shard and cloudsync are loaded ++EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync ++ ++#Disable shard and cloudsync ++TEST $CLI volume set $V0 shard off ++TEST $CLI volume set $V0 cloudsync off ++ ++#Test that both shard and cloudsync are not loaded ++EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++#Enable cloudsync and shard in that order and check if volfile is correct ++TEST $CLI volume set $V0 cloudsync on ++TEST $CLI volume set $V0 shard on ++ ++#Test that both shard and cloudsync are loaded ++EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard ++EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync ++ ++EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync ++ ++cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index b326098..a0ea3b8 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -891,3 +891,24 @@ function check_changelog_op { + + $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l + } ++ ++function volgen_check_ancestry { ++ #Returns Y if ancestor_xl is an ancestor of $child_xl according to the volfile ++ local volfile="$1" ++ ++ local child_xl_type="$2" ++ local child_xl="$3" ++ ++ local ancestor_xl_type="$4" ++ local ancestor_xl="$5" ++ ++ child_linenum=$(awk '/type $child_xl_type\/$child_xl/ {print FNR}' $volfile) ++ ancestor_linenum=$(awk '/type $ancestor_xl_type\/$ancestor_xl/ {print FNR}' $volfile) ++ ++ if [ $child_linenum -lt $ancestor_linenum ]; ++ then ++ echo "Y" ++ else ++ echo "N" ++ fi ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 77aa705..8b58d40 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4360,6 +4360,18 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + "tcp", set_dict); + } + ++ ret = dict_get_str_boolean(set_dict, "features.cloudsync", _gf_false); ++ if (ret == -1) ++ goto out; ++ ++ if (ret) { ++ xl = volgen_graph_add(graph, "features/cloudsync", volname); ++ if (!xl) { ++ ret = -1; ++ goto out; ++ } ++ } ++ + ret = dict_get_str_boolean(set_dict, "features.shard", _gf_false); + if (ret == -1) + goto out; +@@ -4567,18 +4579,6 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + if (ret) + return -1; + +- ret = dict_get_str_boolean(set_dict, "features.cloudsync", _gf_false); +- if (ret == -1) +- goto out; +- +- if (ret) { +- xl = volgen_graph_add(graph, "features/cloudsync", volname); +- if (!xl) { +- ret = -1; +- goto out; +- } +- } +- + /* if the client is part of 'gfproxyd' server, then we need to keep the + volume name as 'gfproxyd-<volname>', for better portmapper options */ + subvol = volname; +-- +1.8.3.1 + diff --git a/SOURCES/0152-storage-posix-changes-with-respect-to-cloudsync.patch b/SOURCES/0152-storage-posix-changes-with-respect-to-cloudsync.patch new file mode 100644 index 0000000..1610009 --- /dev/null +++ b/SOURCES/0152-storage-posix-changes-with-respect-to-cloudsync.patch @@ -0,0 +1,403 @@ +From 10e9f850017d58fcd813ccce253784280326f1d0 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur <atalur@commvault.com> +Date: Tue, 20 Nov 2018 13:15:26 -0800 +Subject: [PATCH 152/169] storage/posix: changes with respect to cloudsync + +Main changes include logic to update iatt buf +with file size from extended attributes in posix +rather than having this logic in cloudsync xlator. + +backport of:https://review.gluster.org/#/c/glusterfs/+/21694/ + +> Change-Id: I44f5f8df7a01e496372557fe2f4eff368dbdaa33 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur <atalur@commvault.com> + +Change-Id: I34880d856fb3add4ce88d64021d08d95405fc1c1 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172191 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/storage/posix/src/posix-entry-ops.c | 1 + + xlators/storage/posix/src/posix-helpers.c | 50 +++++++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 139 ++++++++++++++++++++++--- + xlators/storage/posix/src/posix.h | 2 + + 4 files changed, 177 insertions(+), 15 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index fbd83c4..b24a052 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -272,6 +272,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + } + } + ++ posix_update_iatt_buf(&buf, -1, real_path, xdata); + if (priv->update_pgfid_nlinks) { + if (!gf_uuid_is_null(loc->pargfid) && !IA_ISDIR(buf.ia_type)) { + MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 37e33a9..d0fd45a 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -3453,3 +3453,53 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) + out: + return ret; + } ++ ++void ++posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) ++{ ++ int ret = 0; ++ char val[4096] = { ++ 0, ++ }; ++ ++ if (!xattr_req) ++ return; ++ ++ if (!(dict_getn(xattr_req, GF_CS_OBJECT_STATUS, ++ strlen(GF_CS_OBJECT_STATUS)))) ++ return; ++ ++ if (fd != -1) { ++ ret = sys_fgetxattr(fd, GF_CS_OBJECT_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_size = atoll(val); ++ } else { ++ /* Safe to assume that the other 2 xattrs are also not set*/ ++ return; ++ } ++ ret = sys_fgetxattr(fd, GF_CS_BLOCK_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blksize = atoll(val); ++ } ++ ret = sys_fgetxattr(fd, GF_CS_NUM_BLOCKS, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blocks = atoll(val); ++ } ++ } else { ++ ret = sys_lgetxattr(loc, GF_CS_OBJECT_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_size = atoll(val); ++ } else { ++ /* Safe to assume that the other 2 xattrs are also not set*/ ++ return; ++ } ++ ret = sys_lgetxattr(loc, GF_CS_BLOCK_SIZE, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blksize = atoll(val); ++ } ++ ret = sys_lgetxattr(loc, GF_CS_NUM_BLOCKS, &val, sizeof(val)); ++ if (ret > 0) { ++ buf->ia_blocks = atoll(val); ++ } ++ } ++} +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 7dbbd3d..065fced 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -108,6 +108,63 @@ extern char *marker_xattrs[]; + static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, + NULL}; + ++void ++posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd, ++ char *loc) ++{ ++ int ret = 0; ++ uuid_t uuid; ++ ++ if (!(dict_getn(req, GF_CS_OBJECT_STATUS, strlen(GF_CS_OBJECT_STATUS)))) ++ return; ++ ++ if (!(*rsp)) { ++ *rsp = dict_new(); ++ if (!(*rsp)) { ++ return; ++ } ++ } ++ ++ if (fd != -1) { ++ if (dict_getn(req, GF_CS_XATTR_ARCHIVE_UUID, ++ strlen(GF_CS_XATTR_ARCHIVE_UUID))) { ++ ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); ++ if (ret > 0) { ++ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, ++ true); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, ++ "%s: Failed to set " ++ "dictionary value for %s for fd %d", ++ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd); ++ } ++ } else { ++ gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d", ++ GF_CS_XATTR_ARCHIVE_UUID, fd); ++ } ++ } ++ } else { ++ if (dict_getn(req, GF_CS_XATTR_ARCHIVE_UUID, ++ strlen(GF_CS_XATTR_ARCHIVE_UUID))) { ++ ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); ++ if (ret > 0) { ++ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, ++ true); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, ++ "%s: Failed to set " ++ "dictionary value for %s for loc %s", ++ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc); ++ } ++ } else { ++ gf_msg_debug(this->name, 0, "getxattr failed on %s for %s", ++ GF_CS_XATTR_ARCHIVE_UUID, loc); ++ } ++ } ++ } ++ return; ++} ++ + int32_t + posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +@@ -150,8 +207,11 @@ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + + posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, + &xattr_rsp, _gf_true); ++ ++ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path); + } + ++ posix_update_iatt_buf(&buf, -1, real_path, xdata); + op_ret = 0; + + out: +@@ -422,6 +482,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + if (xdata) + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &statpost); ++ posix_update_iatt_buf(&statpre, -1, real_path, xdata); ++ posix_update_iatt_buf(&statpost, -1, real_path, xdata); + op_ret = 0; + + out: +@@ -898,6 +960,7 @@ posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + } + } + ++ posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata); + /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. + * If it fails, fall back to _posix_do_zerofill() and an optional fsync. + */ +@@ -1366,6 +1429,7 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + } + } + ++ posix_update_iatt_buf(&prebuf, -1, real_path, xdata); + op_ret = sys_truncate(real_path, offset); + if (op_ret == -1) { + op_errno = errno; +@@ -1405,6 +1469,10 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + int32_t _fd = -1; + struct posix_fd *pfd = NULL; + struct posix_private *priv = NULL; ++ struct iatt preop = { ++ 0, ++ }; ++ dict_t *rsp_xdata = NULL; + struct iatt stbuf = { + 0, + }; +@@ -1471,6 +1539,18 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + pfd->flags = flags; + pfd->fd = _fd; + ++ if (xdata) { ++ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); ++ if (op_ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, ++ "pre-operation fstat failed on fd=%p", fd); ++ goto out; ++ } ++ ++ posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, ++ &rsp_xdata, _gf_true); ++ } ++ + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, +@@ -1488,7 +1568,7 @@ out: + + SET_TO_OLD_FS_ID(); + +- STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL); ++ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata); + + return 0; + } +@@ -1573,6 +1653,7 @@ posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + } + } + ++ posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_pread(_fd, iobuf->ptr, size, offset); + if (op_ret == -1) { + op_errno = errno; +@@ -1878,6 +1959,7 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + } + } + ++ posix_update_iatt_buf(&preop, _fd, NULL, xdata); + if (locked && write_append) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; +@@ -2531,10 +2613,8 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + 0, + }; + data_t *tdata = NULL; +- char stime[4096]; +- char sxattr[4096]; ++ char *cs_var = NULL; + gf_cs_obj_state state = -1; +- char remotepath[4096] = {0}; + int i = 0; + int len; + +@@ -2588,10 +2668,11 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + goto unlock; + } + +- sprintf(stime, "%" PRId64, tmp_stbuf.ia_mtime); ++ cs_var = alloca(4096); ++ sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime); + + /*TODO: may be should consider nano-second also */ +- if (strncmp(stime, tdata->data, tdata->len) != 0) { ++ if (strncmp(cs_var, tdata->data, tdata->len) > 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "mtime " + "passed is different from seen by file now." +@@ -2601,31 +2682,54 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + goto unlock; + } + +- len = sprintf(sxattr, "%" PRIu64, tmp_stbuf.ia_size); ++ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size); + +- ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, sxattr, len, ++ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len, + flags); + if (ret) { ++ op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE, + ret); ++ goto unlock; ++ } ++ ++ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks); ++ ++ ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len, ++ flags); ++ if (ret) { + op_errno = errno; ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret); + goto unlock; + } + ++ len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize); ++ ++ ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len, ++ flags); ++ if (ret) { ++ op_errno = errno; ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret); ++ goto unlock; ++ } ++ ++ memset(cs_var, 0, 4096); + if (loc->path[0] == '/') { + for (i = 1; i < strlen(loc->path); i++) { +- remotepath[i - 1] = loc->path[i]; ++ cs_var[i - 1] = loc->path[i]; + } + +- remotepath[i] = '\0'; +- gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", +- remotepath); ++ cs_var[i] = '\0'; ++ gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var); + } + +- ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, remotepath, +- strlen(loc->path), flags); ++ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var, ++ strlen(cs_var), flags); + if (ret) { ++ op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "setxattr failed - %s" + " %d", +@@ -2635,13 +2739,14 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + + ret = sys_truncate(real_path, 0); + if (ret) { ++ op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "truncate failed - %s" + " %d", + GF_CS_OBJECT_SIZE, ret); +- op_errno = errno; + ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE); + if (ret) { ++ op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "removexattr " + "failed post processing- %s" +@@ -2659,6 +2764,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + } + unlock: + UNLOCK(&loc->inode->lock); ++ op_ret = ret; + goto out; + } + +@@ -4927,6 +5033,7 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + } + } + ++ posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_ftruncate(_fd, offset); + + if (op_ret == -1) { +@@ -5008,8 +5115,10 @@ posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + } ++ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL); + } + ++ posix_update_iatt_buf(&buf, _fd, NULL, xdata); + op_ret = 0; + + out: +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index d5ba08c..1da4d01 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -664,4 +664,6 @@ posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + int + posix_spawn_ctx_janitor_thread(xlator_t *this); + ++void ++posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + #endif /* _POSIX_H */ +-- +1.8.3.1 + diff --git a/SOURCES/0153-features-cloudsync-Added-some-new-functions.patch b/SOURCES/0153-features-cloudsync-Added-some-new-functions.patch new file mode 100644 index 0000000..d28a7a5 --- /dev/null +++ b/SOURCES/0153-features-cloudsync-Added-some-new-functions.patch @@ -0,0 +1,1077 @@ +From 90254e4ae9455fa0a126f83700978a9314eb79ea Mon Sep 17 00:00:00 2001 +From: Anuradha Talur <atalur@commvault.com> +Date: Thu, 29 Nov 2018 12:54:21 -0800 +Subject: [PATCH 153/169] features/cloudsync : Added some new functions + +This patch contains the following changes: +1) Store ID info will now be stored in the inode ctx +2) Added new readv type where read is made directly + from the remote store. This choice is made by + volume set operation. +3) cs_forget() was missing. Added it. + +backport of:https://review.gluster.org/#/c/glusterfs/+/21757/ + +> Change-Id: Ie3232b3d7ffb5313a03f011b0553b19793eedfa2 +> fixes: bz#1642168 +> Signed-off-by: Anuradha Talur <atalur@commvault.com> + +Change-Id: I089e5a8c93049cf6bfabf011673796e38e78d7ee +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172192 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/features/cloudsync/src/cloudsync-common.c | 16 + + xlators/features/cloudsync/src/cloudsync-common.h | 35 ++ + xlators/features/cloudsync/src/cloudsync-fops-c.py | 12 +- + .../features/cloudsync/src/cloudsync-mem-types.h | 1 + + xlators/features/cloudsync/src/cloudsync.c | 600 ++++++++++++++++++--- + xlators/features/cloudsync/src/cloudsync.h | 20 + + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 +- + 7 files changed, 597 insertions(+), 94 deletions(-) + +diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c +index aee1f06..445a31b 100644 +--- a/xlators/features/cloudsync/src/cloudsync-common.c ++++ b/xlators/features/cloudsync/src/cloudsync-common.c +@@ -11,6 +11,20 @@ + #include "cloudsync-common.h" + + void ++cs_xattrinfo_wipe(cs_local_t *local) ++{ ++ if (local->xattrinfo.lxattr) { ++ if (local->xattrinfo.lxattr->file_path) ++ GF_FREE(local->xattrinfo.lxattr->file_path); ++ ++ if (local->xattrinfo.lxattr->volname) ++ GF_FREE(local->xattrinfo.lxattr->volname); ++ ++ GF_FREE(local->xattrinfo.lxattr); ++ } ++} ++ ++void + cs_local_wipe(xlator_t *this, cs_local_t *local) + { + if (!local) +@@ -40,5 +54,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local) + if (local->remotepath) + GF_FREE(local->remotepath); + ++ cs_xattrinfo_wipe(local); ++ + mem_put(local); + } +diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h +index 7b3520c..11d2334 100644 +--- a/xlators/features/cloudsync/src/cloudsync-common.h ++++ b/xlators/features/cloudsync/src/cloudsync-common.h +@@ -14,9 +14,23 @@ + #include <glusterfs/call-stub.h> + #include <glusterfs/xlator.h> + #include <glusterfs/syncop.h> ++#include <glusterfs/compat-errno.h> + #include "cloudsync-mem-types.h" + #include "cloudsync-messages.h" + ++typedef struct cs_loc_xattr { ++ char *file_path; ++ uuid_t uuid; ++ uuid_t gfid; ++ char *volname; ++} cs_loc_xattr_t; ++ ++typedef struct cs_size_xattr { ++ uint64_t size; ++ uint64_t blksize; ++ uint64_t blocks; ++} cs_size_xattr_t; ++ + typedef struct cs_local { + loc_t loc; + fd_t *fd; +@@ -34,10 +48,25 @@ typedef struct cs_local { + int call_cnt; + inode_t *inode; + char *remotepath; ++ ++ struct { ++ /* offset, flags and size are the information needed ++ * by read fop for remote read operation. These will be ++ * populated in cloudsync read fop, before being passed ++ * on to the plugin performing remote read. ++ */ ++ off_t offset; ++ uint32_t flags; ++ size_t size; ++ cs_loc_xattr_t *lxattr; ++ } xattrinfo; ++ + } cs_local_t; + + typedef int (*fop_download_t)(call_frame_t *frame, void *config); + ++typedef int (*fop_remote_read_t)(call_frame_t *, void *); ++ + typedef void *(*store_init)(xlator_t *this); + + typedef int (*store_reconfigure)(xlator_t *this, dict_t *options); +@@ -48,6 +77,7 @@ struct cs_remote_stores { + char *name; /* store name */ + void *config; /* store related information */ + fop_download_t dlfop; /* store specific download function */ ++ fop_remote_read_t rdfop; /* store specific read function */ + store_init init; /* store init to initialize store config */ + store_reconfigure reconfigure; /* reconfigure store config */ + store_fini fini; +@@ -59,11 +89,15 @@ typedef struct cs_private { + struct cs_remote_stores *stores; + gf_boolean_t abortdl; + pthread_spinlock_t lock; ++ gf_boolean_t remote_read; + } cs_private_t; + + void + cs_local_wipe(xlator_t *this, cs_local_t *local); + ++void ++cs_xattrinfo_wipe(cs_local_t *local); ++ + #define CS_STACK_UNWIND(fop, frame, params...) \ + do { \ + cs_local_t *__local = NULL; \ +@@ -90,6 +124,7 @@ cs_local_wipe(xlator_t *this, cs_local_t *local); + + typedef struct store_methods { + int (*fop_download)(call_frame_t *frame, void *config); ++ int (*fop_remote_read)(call_frame_t *, void *); + /* return type should be the store config */ + void *(*fop_init)(xlator_t *this); + int (*fop_reconfigure)(xlator_t *this, dict_t *options); +diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py +index 3122bd3..a7a2201 100755 +--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py ++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py +@@ -137,15 +137,15 @@ cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + } else { + __cs_inode_ctx_update (this, fd->inode, val); + gf_msg (this->name, GF_LOG_INFO, 0, 0, +- " state = %ld", val); ++ " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || + val == GF_CS_DOWNLOADING)) { + gf_msg (this->name, GF_LOG_INFO, 0, + 0, " will repair and download " +- "the file, current state : %ld", +- val); ++ "the file, current state : %" ++ PRIu64, val); + goto repair; + } else { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, +@@ -274,7 +274,7 @@ fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate', + # These are the current actual lists used to generate the code + + # The following list contains fops which are fd based that modifies data +-fd_data_modify_op_fop_template = ['readv', 'writev', 'flush', 'fsync', ++fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync', + 'ftruncate', 'rchecksum', 'fallocate', + 'discard', 'zerofill', 'seek'] + +@@ -284,8 +284,8 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr', + 'getattr'] + + # These fops need a separate implementation +-special_fops = ['readdirp', 'statfs', 'setxattr', 'unlink', 'getxattr', +- 'truncate', 'fstat'] ++special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr', ++ 'truncate', 'fstat', 'readv'] + + def gen_defaults(): + for name in ops: +diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h +index 9e6837a..2203464 100644 +--- a/xlators/features/cloudsync/src/cloudsync-mem-types.h ++++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h +@@ -16,6 +16,7 @@ enum cs_mem_types_ { + gf_cs_mt_cs_private_t = gf_common_mt_end + 1, + gf_cs_mt_cs_remote_stores_t, + gf_cs_mt_cs_inode_ctx_t, ++ gf_cs_mt_cs_lxattr_t, + gf_cs_mt_end + }; + #endif /* __CLOUDSYNC_MEM_TYPES_H__ */ +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index fbdcdf7..2240fc3 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -16,6 +16,7 @@ + #include <glusterfs/call-stub.h> + #include "cloudsync-autogen-fops.h" + ++#include <string.h> + #include <dlfcn.h> + + void +@@ -72,6 +73,8 @@ cs_init(xlator_t *this) + + this->private = priv; + ++ GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out); ++ + /* temp workaround. Should be configurable through glusterd*/ + per_vol = _gf_true; + +@@ -135,6 +138,18 @@ cs_init(xlator_t *this) + + (void)dlerror(); + ++ if (priv->remote_read) { ++ priv->stores->rdfop = store_methods->fop_remote_read; ++ if (!priv->stores->rdfop) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "failed to get" ++ " read fop %s", ++ dlerror()); ++ ret = -1; ++ goto out; ++ } ++ } ++ + priv->stores->dlfop = store_methods->fop_download; + if (!priv->stores->dlfop) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, +@@ -196,6 +211,22 @@ out: + return ret; + } + ++int ++cs_forget(xlator_t *this, inode_t *inode) ++{ ++ uint64_t ctx_int = 0; ++ cs_inode_ctx_t *ctx = NULL; ++ ++ inode_ctx_del(inode, this, &ctx_int); ++ if (!ctx_int) ++ return 0; ++ ++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int; ++ ++ GF_FREE(ctx); ++ return 0; ++} ++ + void + cs_fini(xlator_t *this) + { +@@ -217,6 +248,9 @@ cs_reconfigure(xlator_t *this, dict_t *options) + goto out; + } + ++ GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool, ++ out); ++ + /* needed only for per volume configuration*/ + ret = priv->stores->reconfigure(this, options); + +@@ -242,59 +276,6 @@ out: + } + + int32_t +-cs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, +- dict_t *xdata) +-{ +- gf_dirent_t *tmp = NULL; +- char *sxattr = NULL; +- uint64_t ia_size = 0; +- int ret = 0; +- +- list_for_each_entry(tmp, &entries->list, list) +- { +- ret = dict_get_str(tmp->dict, GF_CS_OBJECT_SIZE, &sxattr); +- if (ret) { +- gf_msg_trace(this->name, 0, "size xattr found"); +- continue; +- } +- +- ia_size = atoll(sxattr); +- tmp->d_stat.ia_size = ia_size; +- } +- +- STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); +- return 0; +-} +- +-int32_t +-cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t off, dict_t *xdata) +-{ +- int ret = 0; +- int op_errno = ENOMEM; +- +- if (!xdata) { +- xdata = dict_new(); +- if (!xdata) { +- goto err; +- } +- } +- +- ret = dict_set_int32(xdata, GF_CS_OBJECT_SIZE, 1); +- if (ret) { +- goto err; +- } +- +- STACK_WIND(frame, cs_readdirp_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); +- return 0; +-err: +- STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL); +- return 0; +-} +- +-int32_t + cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +@@ -305,7 +286,6 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local = frame->local; + +- /* Do we need lock here? */ + local->call_cnt++; + + if (op_ret == -1) { +@@ -320,13 +300,13 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto unwind; + } else { + __cs_inode_ctx_update(this, local->loc.inode, val); +- gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %ld", val); ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); + + if (local->call_cnt == 1 && + (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { + gf_msg(this->name, GF_LOG_WARNING, 0, 0, + "will repair and download " +- "the file, current state : %ld", ++ "the file, current state : %" PRIu64, + val); + goto repair; + } else { +@@ -665,7 +645,7 @@ cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + if (op_ret == 0) { + ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); + if (!ret) { +- gf_msg_debug(this->name, 0, "state %ld", val); ++ gf_msg_debug(this->name, 0, "state %" PRIu64, val); + ret = __cs_inode_ctx_update(this, fd->inode, val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); +@@ -831,7 +811,7 @@ out: + return 0; + } + +-void * ++int + cs_download_task(void *arg) + { + call_frame_t *frame = NULL; +@@ -842,7 +822,6 @@ cs_download_task(void *arg) + fd_t *fd = NULL; + cs_local_t *local = NULL; + dict_t *dict = NULL; +- int *retval = NULL; + + frame = (call_frame_t *)arg; + +@@ -850,13 +829,6 @@ cs_download_task(void *arg) + + priv = this->private; + +- retval = GF_CALLOC(1, sizeof(int), gf_common_mt_int); +- if (!retval) { +- gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); +- ret = -1; +- goto out; +- } +- + if (!priv->stores) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "No remote store " +@@ -972,20 +944,13 @@ out: + local->dlfd = NULL; + } + +- if (retval) { +- *retval = ret; +- pthread_exit(retval); +- } else { +- pthread_exit(&ret); +- } ++ return ret; + } + + int + cs_download(call_frame_t *frame) + { +- int *retval = NULL; + int ret = 0; +- pthread_t dthread; + cs_local_t *local = NULL; + xlator_t *this = NULL; + +@@ -1000,16 +965,406 @@ cs_download(call_frame_t *frame) + goto out; + } + +- ret = gf_thread_create(&dthread, NULL, &cs_download_task, (void *)frame, +- "downloadthread"); ++ ret = cs_download_task((void *)frame); ++out: ++ return ret; ++} + +- pthread_join(dthread, (void **)&retval); ++int ++cs_set_xattr_req(call_frame_t *frame) ++{ ++ cs_local_t *local = NULL; ++ GF_UNUSED int ret = 0; ++ ++ local = frame->local; ++ ++ /* When remote reads are performed (i.e. reads on remote store), ++ * there needs to be a way to associate a file on gluster volume ++ * with its correspnding file on the remote store. In order to do ++ * that, a unique key can be maintained as an xattr ++ * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks. ++ * This xattr should be provided to the plugin to ++ * perform the read fop on the correct file. This assumes that the file ++ * hierarchy and name need not be the same on remote store as that of ++ * the gluster volume. ++ */ ++ ret = dict_set_str(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID, "1"); ++ ++ return 0; ++} + +- ret = *retval; ++int ++cs_update_xattrs(call_frame_t *frame, dict_t *xdata) ++{ ++ cs_local_t *local = NULL; ++ xlator_t *this = NULL; ++ int size = -1; ++ GF_UNUSED int ret = 0; ++ ++ local = frame->local; ++ this = frame->this; ++ ++ local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t), ++ gf_cs_mt_cs_lxattr_t); ++ if (!local->xattrinfo.lxattr) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid); ++ ++ if (local->remotepath) { ++ local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath); ++ if (!local->xattrinfo.lxattr->file_path) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ } ++ ++ ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID, ++ &(local->xattrinfo.lxattr->uuid)); ++ ++ if (ret) { ++ gf_uuid_clear(local->xattrinfo.lxattr->uuid); ++ } ++ size = strlen(this->name) - strlen("-cloudsync") + 1; ++ local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char); ++ if (!local->xattrinfo.lxattr->volname) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1); ++ local->xattrinfo.lxattr->volname[size - 1] = '\0'; ++ ++ return 0; ++err: ++ cs_xattrinfo_wipe(local); ++ return -1; ++} ++ ++int ++cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags) ++{ ++ xlator_t *this = NULL; ++ cs_private_t *priv = NULL; ++ int ret = -1; ++ fd_t *fd = NULL; ++ cs_local_t *local = NULL; ++ ++ local = frame->local; ++ this = frame->this; ++ priv = this->private; ++ ++ if (!local->remotepath) { ++ ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "remote path not" ++ " available. Check posix logs to resolve"); ++ goto out; ++ } ++ ++ if (!priv->stores) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "No remote store " ++ "plugins found"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (local->fd) { ++ fd = fd_anonymous(local->fd->inode); ++ } else { ++ fd = fd_anonymous(local->loc.inode); ++ } ++ ++ local->xattrinfo.size = size; ++ local->xattrinfo.offset = offset; ++ local->xattrinfo.flags = flags; ++ ++ if (!fd) { ++ gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed"); ++ ret = -1; ++ goto out; ++ } ++ ++ local->dlfd = fd; ++ local->dloffset = offset; ++ ++ /*this calling method is for per volume setting */ ++ ret = priv->stores->rdfop(frame, priv->stores->config); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "read failed" ++ ", remotepath: %s", ++ local->remotepath); ++ ret = -1; ++ goto out; ++ } else { ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ "read success, path" ++ " : %s", ++ local->remotepath); ++ } + + out: +- if (retval) +- GF_FREE(retval); ++ if (fd) { ++ fd_unref(fd); ++ local->dlfd = NULL; ++ } ++ return ret; ++} ++ ++int32_t ++cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iovec *vector, int32_t count, ++ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) ++{ ++ cs_local_t *local = NULL; ++ int ret = 0; ++ uint64_t val = 0; ++ fd_t *fd = NULL; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ local->call_cnt++; ++ ++ if (op_ret == -1) { ++ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val); ++ if (ret == 0) { ++ if (val == GF_CS_ERROR) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "could not get file state, unwinding"); ++ op_ret = -1; ++ op_errno = EIO; ++ goto unwind; ++ } else { ++ __cs_inode_ctx_update(this, fd->inode, val); ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val); ++ ++ if (local->call_cnt == 1 && ++ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) { ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ " will read from remote : %" PRIu64, val); ++ goto repair; ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "second readv, Unwinding"); ++ goto unwind; ++ } ++ } ++ } else { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "file state " ++ "could not be figured, unwinding"); ++ goto unwind; ++ } ++ } else { ++ /* successful readv => file is local */ ++ __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL); ++ gf_msg(this->name, GF_LOG_INFO, 0, 0, ++ "state : GF_CS_LOCAL" ++ ", readv successful"); ++ ++ goto unwind; ++ } ++ ++repair: ++ ret = locate_and_execute(frame); ++ if (ret) { ++ goto unwind; ++ } ++ ++ return 0; ++ ++unwind: ++ CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf, ++ iobref, xdata); ++ ++ return 0; ++} ++ ++int32_t ++cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) ++{ ++ int ret = 0; ++ ++ ret = cs_resume_postprocess(this, frame, fd->inode); ++ if (ret) { ++ goto unwind; ++ } ++ ++ cs_inodelk_unlock(frame); ++ ++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); ++ ++ return 0; ++ ++unwind: ++ cs_inodelk_unlock(frame); ++ ++ cs_common_cbk(frame); ++ ++ return 0; ++} ++ ++int32_t ++cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, uint32_t flags, dict_t *xdata) ++{ ++ int ret = 0; ++ cs_local_t *local = NULL; ++ gf_cs_obj_state state = -1; ++ cs_inode_ctx_t *ctx = NULL; ++ ++ cs_inodelk_unlock(frame); ++ ++ local = frame->local; ++ if (!local) { ++ ret = -1; ++ goto unwind; ++ } ++ ++ __cs_inode_ctx_get(this, fd->inode, &ctx); ++ ++ state = __cs_get_file_state(this, fd->inode, ctx); ++ if (state == GF_CS_ERROR) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "status is GF_CS_ERROR." ++ " Aborting readv"); ++ local->op_ret = -1; ++ local->op_errno = EREMOTE; ++ ret = -1; ++ goto unwind; ++ } ++ ++ /* Serve readv from remote store only if it is remote. */ ++ gf_msg_debug(this->name, 0, "status of file %s is %d", ++ local->remotepath ? local->remotepath : "", state); ++ ++ /* We will reach this condition if local inode ctx had REMOTE ++ * state when the control was in cs_readv but after stat ++ * we got an updated state saying that the file is LOCAL. ++ */ ++ if (state == GF_CS_LOCAL) { ++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, ++ xdata); ++ } else if (state == GF_CS_REMOTE) { ++ ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset, ++ size, flags); ++ /* Failed to submit the remote readv fop to plugin */ ++ if (ret) { ++ local->op_ret = -1; ++ local->op_errno = EREMOTE; ++ goto unwind; ++ } ++ /* When the file is in any other intermediate state, ++ * we should not perform remote reads. ++ */ ++ } else { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ ++ return 0; ++ ++unwind: ++ cs_common_cbk(frame); ++ ++ return 0; ++} ++ ++int32_t ++cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) ++{ ++ int op_errno = -1; ++ cs_local_t *local = NULL; ++ int ret = 0; ++ cs_inode_ctx_t *ctx = NULL; ++ gf_cs_obj_state state = -1; ++ cs_private_t *priv = NULL; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(fd, err); ++ ++ priv = this->private; ++ ++ local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ); ++ if (!local) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ __cs_inode_ctx_get(this, fd->inode, &ctx); ++ ++ if (ctx) ++ state = __cs_get_file_state(this, fd->inode, ctx); ++ else ++ state = GF_CS_LOCAL; ++ ++ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new()); ++ ++ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "dict_set failed key:" ++ " %s", ++ GF_CS_OBJECT_STATUS); ++ goto err; ++ } ++ ++ if (priv->remote_read) { ++ local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size, ++ offset, flags, xdata); ++ } else { ++ local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset, ++ flags, xdata); ++ } ++ if (!local->stub) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ if (state == GF_CS_LOCAL) { ++ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, ++ xdata); ++ } else { ++ local->call_cnt++; ++ ret = locate_and_execute(frame); ++ if (ret) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL); ++ ++ return 0; ++} ++ ++int ++cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, ++ inode_t *inode, off_t offset, size_t size, ++ uint32_t flags) ++{ ++ int ret = 0; ++ ++ ret = cs_serve_readv(frame, offset, size, flags); + + return ret; + } +@@ -1059,7 +1414,7 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + goto err; + } else { + ret = __cs_inode_ctx_update(this, inode, val); +- gf_msg_debug(this->name, 0, "status : %lu", val); ++ gf_msg_debug(this->name, 0, "status : %" PRIu64, val); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed"); + local->op_ret = -1; +@@ -1087,6 +1442,10 @@ cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + gf_msg_debug(this->name, 0, "NULL filepath"); + } + ++ ret = cs_update_xattrs(frame, xdata); ++ if (ret) ++ goto err; ++ + local->op_ret = 0; + local->xattr_rsp = dict_ref(xdata); + memcpy(&local->stbuf, stbuf, sizeof(struct iatt)); +@@ -1121,6 +1480,8 @@ cs_do_stat_check(call_frame_t *main_frame) + goto err; + } + ++ cs_set_xattr_req(main_frame); ++ + if (local->fd) { + STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req); +@@ -1177,6 +1538,10 @@ cs_common_cbk(call_frame_t *frame) + NULL, NULL, NULL); + break; + ++ case GF_FOP_TRUNCATE: ++ CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, ++ NULL, NULL, NULL); ++ break; + default: + break; + } +@@ -1427,7 +1792,7 @@ __cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx) + if (ret) + *ctx = NULL; + else +- *ctx = (cs_inode_ctx_t *)ctxint; ++ *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + return; + } +@@ -1452,7 +1817,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val) + + ctx->state = val; + +- ctxint = (uint64_t)ctx; ++ ctxint = (uint64_t)(uintptr_t)ctx; + + ret = __inode_ctx_set(inode, this, &ctxint); + if (ret) { +@@ -1460,7 +1825,7 @@ __cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val) + goto out; + } + } else { +- ctx = (cs_inode_ctx_t *)ctxint; ++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + ctx->state = val; + } +@@ -1483,7 +1848,7 @@ cs_inode_ctx_reset(xlator_t *this, inode_t *inode) + return 0; + } + +- ctx = (cs_inode_ctx_t *)ctxint; ++ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint; + + GF_FREE(ctx); + return 0; +@@ -1532,6 +1897,57 @@ cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode) + out: + return ret; + } ++ ++int32_t ++__cs_get_dict_str(char **str, dict_t *xattr, const char *name, int *errnum) ++{ ++ data_t *data = NULL; ++ int ret = -1; ++ ++ assert(str != NULL); ++ ++ data = dict_get(xattr, (char *)name); ++ if (!data) { ++ *errnum = ENODATA; ++ goto out; ++ } ++ ++ *str = GF_CALLOC(data->len + 1, sizeof(char), gf_common_mt_char); ++ if (!(*str)) { ++ *errnum = ENOMEM; ++ goto out; ++ } ++ ++ memcpy(*str, data->data, sizeof(char) * (data->len)); ++ return 0; ++ ++out: ++ return ret; ++} ++ ++int32_t ++__cs_get_dict_uuid(uuid_t uuid, dict_t *xattr, const char *name, int *errnum) ++{ ++ data_t *data = NULL; ++ int ret = -1; ++ ++ assert(uuid != NULL); ++ ++ data = dict_get(xattr, (char *)name); ++ if (!data) { ++ *errnum = ENODATA; ++ goto out; ++ } ++ ++ assert(data->len == sizeof(uuid_t)); ++ ++ gf_uuid_copy(uuid, (unsigned char *)data->data); ++ return 0; ++ ++out: ++ return ret; ++} ++ + int32_t + cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict) + { +@@ -1606,7 +2022,6 @@ cs_notify(xlator_t *this, int event, void *data, ...) + + struct xlator_fops cs_fops = { + .stat = cs_stat, +- .readdirp = cs_readdirp, + .truncate = cs_truncate, + .seek = cs_seek, + .statfs = cs_statfs, +@@ -1627,7 +2042,9 @@ struct xlator_fops cs_fops = { + .zerofill = cs_zerofill, + }; + +-struct xlator_cbks cs_cbks = {}; ++struct xlator_cbks cs_cbks = { ++ .forget = cs_forget, ++}; + + struct xlator_dumpops cs_dumpops = { + .fdctx_to_dict = cs_fdctx_to_dict, +@@ -1647,6 +2064,15 @@ struct volume_options cs_options[] = { + {.key = {"cloudsync-storetype"}, + .type = GF_OPTION_TYPE_STR, + .description = "Defines which remote store is enabled"}, ++ {.key = {"cloudsync-remote-read"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .description = "Defines a remote read fop when on"}, ++ {.key = {"cloudsync-store-id"}, ++ .type = GF_OPTION_TYPE_STR, ++ .description = "Defines a volume wide store id"}, ++ {.key = {"cloudsync-product-id"}, ++ .type = GF_OPTION_TYPE_STR, ++ .description = "Defines a volume wide product id"}, + {.key = {NULL}}, + }; + +diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h +index dbdb207..0cb800a 100644 +--- a/xlators/features/cloudsync/src/cloudsync.h ++++ b/xlators/features/cloudsync/src/cloudsync.h +@@ -19,6 +19,7 @@ + #include "cloudsync-common.h" + #include "cloudsync-autogen-fops.h" + ++#define ALIGN_SIZE 4096 + #define CS_LOCK_DOMAIN "cs.protect.file.stat" + typedef struct cs_dlstore { + off_t off; +@@ -29,6 +30,7 @@ typedef struct cs_dlstore { + } cs_dlstore; + + typedef struct cs_inode_ctx { ++ cs_loc_xattr_t locxattr; + gf_cs_obj_state state; + } cs_inode_ctx_t; + +@@ -100,4 +102,22 @@ cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t + cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xattr_req); ++ ++int32_t ++cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iovec *vector, int32_t count, ++ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata); ++int32_t ++cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata); ++int32_t ++cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata); ++ ++int ++cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame, ++ inode_t *inode, off_t offset, size_t size, ++ uint32_t flags); ++int ++cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags); + #endif /* __CLOUDSYNC_H__ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 4b32fb6..73abf37 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3693,7 +3693,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_5_0, + .description = "enable/disable noatime option with ctime enabled.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, +- {.key = "feature.cloudsync-storetype", ++ {.key = "features.cloudsync-storetype", + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_5_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, +@@ -3721,4 +3721,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .validate_fn = validate_boolean, + .description = "option to enforce mandatory lock on a file", + .flags = VOLOPT_FLAG_XLATOR_OPT}, ++ {.key = "features.cloudsync-remote-read", ++ .voltype = "features/cloudsync", ++ .value = "off", ++ .op_version = GD_OP_VERSION_6_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = NULL}}; +-- +1.8.3.1 + diff --git a/SOURCES/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch b/SOURCES/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch new file mode 100644 index 0000000..6068678 --- /dev/null +++ b/SOURCES/0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch @@ -0,0 +1,1394 @@ +From b402b89f71a3ebabca24c459f106af1f9610939a Mon Sep 17 00:00:00 2001 +From: Anuradha Talur <atalur@commvault.com> +Date: Fri, 30 Nov 2018 11:23:07 -0800 +Subject: [PATCH 154/169] cloudsync/cvlt: Cloudsync plugin for commvault store + +backport of: https://review.gluster.org/#/c/glusterfs/+/21771/ + +> Change-Id: Icbe53e78e9c4f6699c7a26a806ef4b14b39f5019 +> updates: bz#1642168 +> Signed-off-by: Anuradha Talur <atalur@commvault.com> + +Change-Id: Ib543605daa51fa1cfe77ed475390a30ef14e6452 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172194 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + configure.ac | 13 + + glusterfs.spec.in | 1 + + libglusterfs/src/glusterfs/glfs-message-id.h | 1 + + .../src/cloudsync-plugins/src/Makefile.am | 6 +- + .../src/cloudsync-plugins/src/cvlt/Makefile.am | 3 + + .../src/cloudsync-plugins/src/cvlt/src/Makefile.am | 12 + + .../cloudsync-plugins/src/cvlt/src/archivestore.h | 203 +++++ + .../cloudsync-plugins/src/cvlt/src/cvlt-messages.h | 30 + + .../src/cvlt/src/libcloudsynccvlt.sym | 1 + + .../src/cvlt/src/libcvlt-mem-types.h | 19 + + .../src/cloudsync-plugins/src/cvlt/src/libcvlt.c | 842 +++++++++++++++++++++ + .../src/cloudsync-plugins/src/cvlt/src/libcvlt.h | 84 ++ + xlators/features/cloudsync/src/cloudsync.c | 6 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 10 +- + 14 files changed, 1228 insertions(+), 3 deletions(-) + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c + create mode 100644 xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h + +diff --git a/configure.ac b/configure.ac +index 0e11d4c..f597b86 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -170,6 +170,8 @@ AC_CONFIG_FILES([Makefile + xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile + xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile + xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile ++ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile ++ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile + xlators/playground/Makefile + xlators/playground/template/Makefile + xlators/playground/template/src/Makefile +@@ -937,6 +939,17 @@ AM_CONDITIONAL([BUILD_AMAZONS3_PLUGIN], [test "x$HAVE_AMAZONS3" = "xyes"]) + if test "x$HAVE_AMAZONS3" = "xyes";then + BUILD_CLOUDSYNC="yes" + fi ++BUILD_CVLT_PLUGIN="no" ++case $host_os in ++#enable cvlt plugin only for linux platforms ++ linux*) ++ BUILD_CVLT_PLUGIN="yes" ++ BUILD_CLOUDSYNC="yes" ++ ;; ++ *) ++ ;; ++esac ++AM_CONDITIONAL([BUILD_CVLT_PLUGIN], [test "x$BUILD_CVLT_PLUGIN" = "xyes"]) + AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"]) + dnl end cloudsync section + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ed58356..85e75f2 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1199,6 +1199,7 @@ exit 0 + %files cloudsync-plugins + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins + %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so ++ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so + + %files devel + %dir %{_includedir}/glusterfs +diff --git a/libglusterfs/src/glusterfs/glfs-message-id.h b/libglusterfs/src/glusterfs/glfs-message-id.h +index 001f4ab..a1a16ca 100644 +--- a/libglusterfs/src/glusterfs/glfs-message-id.h ++++ b/libglusterfs/src/glusterfs/glfs-message-id.h +@@ -93,6 +93,7 @@ enum _msgid_comp { + GLFS_MSGID_COMP(TEMPLATE, 1), + GLFS_MSGID_COMP(UTIME, 1), + GLFS_MSGID_COMP(SNAPVIEW_SERVER, 1), ++ GLFS_MSGID_COMP(CVLT, 1), + /* --- new segments for messages goes above this line --- */ + + GLFS_MSGID_END +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am +index 4deefb6..fb6b058 100644 +--- a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am +@@ -2,6 +2,10 @@ if BUILD_AMAZONS3_PLUGIN + AMAZONS3_DIR = cloudsyncs3 + endif + +-SUBDIRS = ${AMAZONS3_DIR} ++if BUILD_CVLT_PLUGIN ++ CVLT_DIR = cvlt ++endif ++ ++SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR} + + CLEANFILES = +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am +new file mode 100644 +index 0000000..a985f42 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am +@@ -0,0 +1,3 @@ ++SUBDIRS = src ++ ++CLEANFILES = +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am +new file mode 100644 +index 0000000..b512464 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am +@@ -0,0 +1,12 @@ ++csp_LTLIBRARIES = cloudsynccvlt.la ++cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins ++ ++cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c ++cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la ++cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym ++AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src ++noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h ++AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src ++CLEANFILES = ++ ++EXTRA_DIST = libcloudsynccvlt.sym +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h +new file mode 100644 +index 0000000..7230ef7 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h +@@ -0,0 +1,203 @@ ++/* ++ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __ARCHIVESTORE_H__ ++#define __ARCHIVESTORE_H__ ++ ++#include <stdlib.h> ++#include <stddef.h> ++#include <stdint.h> ++#include <dlfcn.h> ++#include <uuid/uuid.h> ++ ++#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid" ++#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id" ++#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id" ++ ++struct _archstore_methods; ++typedef struct _archstore_methods archstore_methods_t; ++ ++struct _archstore_desc { ++ void *priv; /* Private field for store mgmt. */ ++ /* To be used only by archive store*/ ++}; ++typedef struct _archstore_desc archstore_desc_t; ++ ++struct _archstore_info { ++ char *id; /* Identifier for the archivestore */ ++ uint32_t idlen; /* Length of identifier string */ ++ char *prod; /* Name of the data mgmt. product */ ++ uint32_t prodlen; /* Length of the product string */ ++}; ++typedef struct _archstore_info archstore_info_t; ++ ++struct _archstore_fileinfo { ++ uuid_t uuid; /* uuid of the file */ ++ char *path; /* file path */ ++ uint32_t pathlength; /* length of file path */ ++}; ++typedef struct _archstore_fileinfo archstore_fileinfo_t; ++ ++struct _app_callback_info { ++ archstore_info_t *src_archstore; ++ archstore_fileinfo_t *src_archfile; ++ archstore_info_t *dest_archstore; ++ archstore_fileinfo_t *dest_archfile; ++}; ++typedef struct _app_callback_info app_callback_info_t; ++ ++typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *, ++ void *, int64_t, int32_t); ++ ++enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 }; ++typedef enum _archstore_scan_type archstore_scan_type_t; ++ ++typedef int32_t archstore_errno_t; ++ ++/* ++ * Initialize archive store. ++ * arg1 pointer to structure containing archive store information ++ * arg2 error number if any generated during the initialization ++ * arg3 name of the log file ++ */ ++typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *, ++ const char *); ++ ++/* ++ * Clean up archive store. ++ * arg1 pointer to structure containing archive store information ++ * arg2 error number if any generated during the cleanup ++ */ ++typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *); ++ ++/* ++ * Read the contents of the file from archive store ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing archive store information ++ * arg3 pointer to structure containing information about file to be read ++ * arg4 offset in the file from which data should be read ++ * arg5 buffer where the data should be read ++ * arg6 number of bytes of data to be read ++ * arg7 error number if any generated during the read from file ++ * arg8 callback handler to be invoked after the data is read ++ * arg9 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, off_t, char *, ++ size_t, archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Restore the contents of the file from archive store ++ * This is basically in-place restore ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing archive store information ++ * arg3 pointer to structure containing information about file to be restored ++ * arg4 error number if any generated during the file restore ++ * arg5 callback to be invoked after the file is restored ++ * arg6 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Restore the contents of the file from archive store to a different store ++ * This is basically out-of-place restore ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing source archive store information ++ * arg3 pointer to structure containing information about file to be restored ++ * arg4 pointer to structure containing destination archive store information ++ * arg5 pointer to structure containing information about the location to ++ which the file will be restored ++ * arg6 error number if any generated during the file restore ++ * arg7 callback to be invoked after the file is restored ++ * arg8 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Archive the contents of the file to archive store ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing source archive store information ++ * arg3 pointer to structure containing information about files to be archived ++ * arg4 pointer to structure containing destination archive store information ++ * arg5 pointer to structure containing information about files that failed ++ * to be archived ++ * arg6 error number if any generated during the file archival ++ * arg7 callback to be invoked after the file is archived ++ * arg8 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Backup list of files provided in the input file ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing source archive store information ++ * arg3 pointer to structure containing information about files to be backed up ++ * arg4 pointer to structure containing destination archive store information ++ * arg5 pointer to structure containing information about files that failed ++ * to be backed up ++ * arg6 error number if any generated during the file archival ++ * arg7 callback to be invoked after the file is archived ++ * arg8 cookie to be passed when callback is invoked ++ */ ++typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_info_t *, ++ archstore_fileinfo_t *, ++ archstore_errno_t *, app_callback_t, ++ void *); ++ ++/* ++ * Scan the contents of a store and determine the files which need to be ++ * backed up. ++ * arg1 pointer to structure containing archive store description ++ * arg2 pointer to structure containing archive store information ++ * arg3 type of scan whether full or incremental ++ * arg4 path to file that contains list of files to be backed up ++ * arg5 error number if any generated during scan operation ++ */ ++typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *, ++ archstore_scan_type_t, char *, ++ archstore_errno_t *); ++ ++struct _archstore_methods { ++ init_archstore_t init; ++ term_archstore_t fini; ++ backup_archstore_t backup; ++ archive_archstore_t archive; ++ scan_archstore_t scan; ++ restore_archstore_t restore; ++ recall_archstore_t recall; ++ read_archstore_t read; ++}; ++ ++typedef int (*get_archstore_methods_t)(archstore_methods_t *); ++ ++/* ++ * Single function that will be invoked by applications for extracting ++ * the function pointers to all data management functions. ++ */ ++int32_t ++get_archstore_methods(archstore_methods_t *); ++ ++#endif /* End of __ARCHIVESTORE_H__ */ +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h +new file mode 100644 +index 0000000..57c9aa7 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h +@@ -0,0 +1,30 @@ ++/* ++ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++ */ ++ ++#ifndef _CVLT_MESSAGES_H_ ++#define _CVLT_MESSAGES_H_ ++ ++#include <glusterfs/glfs-message-id.h> ++ ++/* To add new message IDs, append new identifiers at the end of the list. ++ * ++ * Never remove a message ID. If it's not used anymore, you can rename it or ++ * leave it as it is, but not delete it. This is to prevent reutilization of ++ * IDs by other messages. ++ * ++ * The component name must match one of the entries defined in ++ * glfs-message-id.h. ++ */ ++ ++GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE, ++ CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED, ++ CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED); ++ ++#endif /* !_CVLT_MESSAGES_H_ */ +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym +new file mode 100644 +index 0000000..0bc2736 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym +@@ -0,0 +1 @@ ++store_ops +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h +new file mode 100644 +index 0000000..c24fab8 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h +@@ -0,0 +1,19 @@ ++/* ++ * Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> ++ * This file is part of GlusterFS. ++ * ++ * This file is licensed to you under your choice of the GNU Lesser ++ * General Public License, version 3 or any later version (LGPLv3 or ++ * later), or the GNU General Public License, version 2 (GPLv2), in all ++ * cases as published by the Free Software Foundation. ++ */ ++ ++#ifndef __LIBCVLT_MEM_TYPES_H__ ++#define __LIBCVLT_MEM_TYPES_H__ ++ ++#include <glusterfs/mem-types.h> ++enum libcvlt_mem_types_ { ++ gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1, ++ gf_libcvlt_mt_end ++}; ++#endif /* __LIBCVLT_MEM_TYPES_H__ */ +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c +new file mode 100644 +index 0000000..e827882 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c +@@ -0,0 +1,842 @@ ++#include <stdlib.h> ++#include <glusterfs/xlator.h> ++#include <glusterfs/glusterfs.h> ++#include "libcvlt.h" ++#include "cloudsync-common.h" ++#include "cvlt-messages.h" ++ ++#define LIBARCHIVE_SO "libopenarchive.so" ++#define ALIGN_SIZE 4096 ++#define CVLT_TRAILER "cvltv1" ++ ++store_methods_t store_ops = { ++ .fop_download = cvlt_download, ++ .fop_init = cvlt_init, ++ .fop_reconfigure = cvlt_reconfigure, ++ .fop_fini = cvlt_fini, ++ .fop_remote_read = cvlt_read, ++}; ++ ++static const int32_t num_req = 32; ++static const int32_t num_iatt = 32; ++static char *plugin = "cvlt_cloudSync"; ++ ++int32_t ++mem_acct_init(xlator_t *this) ++{ ++ int ret = -1; ++ ++ if (!this) ++ return ret; ++ ++ ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1); ++ ++ if (ret != 0) { ++ return ret; ++ } ++ ++ return ret; ++} ++ ++static void ++cvlt_free_resources(archive_t *arch) ++{ ++ /* ++ * We will release all the resources that were allocated by the xlator. ++ * Check whether there are any buffers which have not been released ++ * back to a mempool. ++ */ ++ ++ if (arch->handle) { ++ dlclose(arch->handle); ++ } ++ ++ if (arch->iobuf_pool) { ++ iobuf_pool_destroy(arch->iobuf_pool); ++ } ++ ++ if (arch->req_pool) { ++ mem_pool_destroy(arch->req_pool); ++ arch->req_pool = NULL; ++ } ++ ++ return; ++} ++ ++static int32_t ++cvlt_extract_store_fops(xlator_t *this, archive_t *arch) ++{ ++ int32_t op_ret = -1; ++ get_archstore_methods_t get_archstore_methods; ++ ++ /* ++ * libopenarchive.so defines methods for performing data management ++ * operations. We will extract the methods from library and these ++ * methods will be invoked for moving data between glusterfs volume ++ * and the data management product. ++ */ ++ ++ VALIDATE_OR_GOTO(arch, err); ++ ++ arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW); ++ if (!arch->handle) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED, ++ " failed to open %s ", LIBARCHIVE_SO); ++ return op_ret; ++ } ++ ++ dlerror(); /* Clear any existing error */ ++ ++ get_archstore_methods = dlsym(arch->handle, "get_archstore_methods"); ++ if (!get_archstore_methods) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " Error extracting get_archstore_methods()"); ++ dlclose(arch->handle); ++ arch->handle = NULL; ++ return op_ret; ++ } ++ ++ op_ret = get_archstore_methods(&(arch->fops)); ++ if (op_ret) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " Failed to extract methods in get_archstore_methods"); ++ dlclose(arch->handle); ++ arch->handle = NULL; ++ return op_ret; ++ } ++ ++err: ++ return op_ret; ++} ++ ++static int32_t ++cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt) ++{ ++ /* ++ * Initialize information about all the memory pools that will be ++ * used by this xlator. ++ */ ++ arch->nreqs = 0; ++ ++ arch->req_pool = NULL; ++ ++ arch->handle = NULL; ++ arch->xl = this; ++ ++ arch->req_pool = mem_pool_new(cvlt_request_t, num_req); ++ if (!arch->req_pool) { ++ goto err; ++ } ++ ++ arch->iobuf_pool = iobuf_pool_new(); ++ if (!arch->iobuf_pool) { ++ goto err; ++ } ++ ++ if (cvlt_extract_store_fops(this, arch)) { ++ goto err; ++ } ++ ++ return 0; ++ ++err: ++ ++ return -1; ++} ++ ++static void ++cvlt_req_init(cvlt_request_t *req) ++{ ++ sem_init(&(req->sem), 0, 0); ++ ++ return; ++} ++ ++static void ++cvlt_req_destroy(cvlt_request_t *req) ++{ ++ if (req->iobuf) { ++ iobuf_unref(req->iobuf); ++ } ++ ++ if (req->iobref) { ++ iobref_unref(req->iobref); ++ } ++ ++ sem_destroy(&(req->sem)); ++ ++ return; ++} ++ ++static cvlt_request_t * ++cvlt_alloc_req(archive_t *arch) ++{ ++ cvlt_request_t *reqptr = NULL; ++ ++ if (!arch) { ++ goto err; ++ } ++ ++ if (arch->req_pool) { ++ reqptr = mem_get0(arch->req_pool); ++ if (reqptr) { ++ cvlt_req_init(reqptr); ++ } ++ } ++ ++ if (reqptr) { ++ LOCK(&(arch->lock)); ++ arch->nreqs++; ++ UNLOCK(&(arch->lock)); ++ } ++ ++err: ++ return reqptr; ++} ++ ++static int32_t ++cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr) ++{ ++ if (!reqptr) { ++ goto err; ++ } ++ ++ if (!arch) { ++ goto err; ++ } ++ ++ if (arch->req_pool) { ++ /* ++ * Free the request resources if they exist. ++ */ ++ ++ cvlt_req_destroy(reqptr); ++ mem_put(reqptr); ++ ++ LOCK(&(arch->lock)); ++ arch->nreqs--; ++ UNLOCK(&(arch->lock)); ++ } ++ ++ return 0; ++ ++err: ++ return -1; ++} ++ ++static int32_t ++cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt) ++{ ++ int32_t ret = -1; ++ int32_t errnum = -1; ++ int32_t locked = 0; ++ ++ /* ++ * Perform all the initializations needed for brining up the xlator. ++ */ ++ if (!arch) { ++ goto err; ++ } ++ ++ LOCK_INIT(&(arch->lock)); ++ LOCK(&(arch->lock)); ++ ++ locked = 1; ++ ++ ret = cvlt_alloc_resources(this, arch, num_req, num_iatt); ++ ++ if (ret) { ++ goto err; ++ } ++ ++ /* ++ * Now that the fops have been extracted initialize the store ++ */ ++ ret = arch->fops.init(&(arch->descinfo), &errnum, plugin); ++ if (ret) { ++ goto err; ++ } ++ ++ UNLOCK(&(arch->lock)); ++ locked = 0; ++ ret = 0; ++ ++ return ret; ++ ++err: ++ cvlt_free_resources(arch); ++ ++ if (locked) { ++ UNLOCK(&(arch->lock)); ++ } ++ ++ return ret; ++} ++ ++static int32_t ++cvlt_term_xlator(archive_t *arch) ++{ ++ int32_t errnum = -1; ++ ++ if (!arch) { ++ goto err; ++ } ++ ++ LOCK(&(arch->lock)); ++ ++ /* ++ * Release the resources that have been allocated inside store ++ */ ++ arch->fops.fini(&(arch->descinfo), &errnum); ++ ++ cvlt_free_resources(arch); ++ ++ UNLOCK(&(arch->lock)); ++ ++ GF_FREE(arch); ++ ++ return 0; ++ ++err: ++ return -1; ++} ++ ++static int32_t ++cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info) ++{ ++ if (!store_info) { ++ return -1; ++ } ++ ++ store_info->prod = priv->product_id; ++ store_info->prodlen = strlen(priv->product_id); ++ ++ store_info->id = priv->store_id; ++ store_info->idlen = strlen(priv->store_id); ++ ++ return 0; ++} ++ ++static int32_t ++cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info) ++{ ++ if (!xattr || !file_info) { ++ return -1; ++ } ++ ++ gf_uuid_copy(file_info->uuid, xattr->uuid); ++ file_info->path = xattr->file_path; ++ file_info->pathlength = strlen(xattr->file_path); ++ ++ return 0; ++} ++ ++static int32_t ++cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr, ++ archstore_info_t *store_info) ++{ ++ static char *product = "glusterfs"; ++ ++ if (!xattr || !store_info) { ++ return -1; ++ } ++ ++ store_info->prod = product; ++ store_info->prodlen = strlen(product); ++ ++ store_info->id = xattr->volname; ++ store_info->idlen = strlen(xattr->volname); ++ ++ return 0; ++} ++ ++static int32_t ++cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr, ++ archstore_fileinfo_t *file_info) ++{ ++ if (!xattr || !file_info) { ++ return -1; ++ } ++ ++ gf_uuid_copy(file_info->uuid, xattr->gfid); ++ file_info->path = xattr->file_path; ++ file_info->pathlength = strlen(xattr->file_path); ++ ++ return 0; ++} ++ ++static void ++cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs) ++{ ++ /* ++ * If the file was archived then the reported size will not be a ++ * correct one. We need to fix this. ++ */ ++ if (buf && xattrs) { ++ buf->ia_size = xattrs->size; ++ buf->ia_blksize = xattrs->blksize; ++ buf->ia_blocks = xattrs->blocks; ++ } ++ ++ return; ++} ++ ++static void ++cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo, ++ void *cookie, int64_t op_ret, int32_t op_errno) ++{ ++ struct iovec iov; ++ xlator_t *this = NULL; ++ struct iatt postbuf = { ++ 0, ++ }; ++ call_frame_t *frame = NULL; ++ cvlt_request_t *req = (cvlt_request_t *)cookie; ++ cs_local_t *local = NULL; ++ cs_private_t *cspriv = NULL; ++ archive_t *priv = NULL; ++ ++ frame = req->frame; ++ this = frame->this; ++ local = frame->local; ++ ++ cspriv = this->private; ++ priv = (archive_t *)cspriv->stores->config; ++ ++ if (strcmp(priv->trailer, CVLT_TRAILER)) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ gf_msg_debug(plugin, 0, ++ " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64 ++ " op : %d ret : %" PRId64 " errno : %d", ++ req->offset, req->bytes, req->op_type, op_ret, op_errno); ++ ++ if (op_ret < 0) { ++ goto out; ++ } ++ ++ req->iobref = iobref_new(); ++ if (!req->iobref) { ++ op_ret = -1; ++ op_errno = ENOMEM; ++ goto out; ++ } ++ ++ iobref_add(req->iobref, req->iobuf); ++ iov.iov_base = iobuf_ptr(req->iobuf); ++ iov.iov_len = op_ret; ++ ++ cvlt_copy_stat_info(&postbuf, &(req->szxattr)); ++ ++ /* ++ * Hack to notify higher layers of EOF. ++ */ ++ if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) { ++ gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s", ++ uuid_utoa(req->file_info.uuid)); ++ op_errno = ENOENT; ++ } ++ ++out: ++ ++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf, ++ req->iobref, local->xattr_rsp); ++ ++ if (req) { ++ cvlt_free_req(priv, req); ++ } ++ ++ return; ++} ++ ++static void ++cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info, ++ void *cookie, int64_t ret, int errcode) ++{ ++ cvlt_request_t *req = (cvlt_request_t *)cookie; ++ ++ gf_msg_debug(plugin, 0, ++ " Download callback invoked ret : %" PRId64 " errno : %d", ++ ret, errcode); ++ ++ req->op_ret = ret; ++ req->op_errno = errcode; ++ sem_post(&(req->sem)); ++ ++ return; ++} ++ ++void * ++cvlt_init(xlator_t *this) ++{ ++ int ret = 0; ++ archive_t *priv = NULL; ++ ++ if (!this->children || this->children->next) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, ++ "should have exactly one child"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (!this->parents) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, ++ "dangling volume. check volfile"); ++ ret = -1; ++ goto out; ++ } ++ ++ priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t); ++ if (!priv) { ++ ret = -1; ++ goto out; ++ } ++ ++ priv->trailer = CVLT_TRAILER; ++ if (cvlt_init_xlator(this, priv, num_req, num_iatt)) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed"); ++ ret = -1; ++ goto out; ++ } ++ ++ GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out); ++ GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out); ++ ++ gf_msg(plugin, GF_LOG_INFO, 0, 0, ++ "store id is : %s " ++ "product id is : %s.", ++ priv->store_id, priv->product_id); ++out: ++ if (ret == -1) { ++ cvlt_term_xlator(priv); ++ return (NULL); ++ } ++ return priv; ++} ++ ++int ++cvlt_reconfigure(xlator_t *this, dict_t *options) ++{ ++ cs_private_t *cspriv = NULL; ++ archive_t *priv = NULL; ++ ++ cspriv = this->private; ++ priv = (archive_t *)cspriv->stores->config; ++ ++ if (strcmp(priv->trailer, CVLT_TRAILER)) ++ goto out; ++ ++ GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out); ++ ++ GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str, ++ out); ++ gf_msg_debug(plugin, 0, ++ "store id is : %s " ++ "product id is : %s.", ++ priv->store_id, priv->product_id); ++ return 0; ++out: ++ return -1; ++} ++ ++void ++cvlt_fini(void *config) ++{ ++ archive_t *priv = NULL; ++ ++ priv = (archive_t *)config; ++ ++ if (strcmp(priv->trailer, CVLT_TRAILER)) ++ return; ++ ++ cvlt_term_xlator(priv); ++ gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources"); ++ return; ++} ++ ++int ++cvlt_download(call_frame_t *frame, void *config) ++{ ++ archive_t *parch = NULL; ++ cs_local_t *local = frame->local; ++ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; ++ cvlt_request_t *req = NULL; ++ archstore_info_t dest_storeinfo; ++ archstore_fileinfo_t dest_fileinfo; ++ int32_t op_ret, op_errno; ++ ++ parch = (archive_t *)config; ++ ++ if (strcmp(parch->trailer, CVLT_TRAILER)) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ", ++ locxattr->uuid, uuid_utoa(locxattr->gfid)); ++ ++ if (!(parch->fops.restore)) { ++ op_errno = ELIBBAD; ++ goto err; ++ } ++ ++ /* ++ * Download needs to be processed. Allocate a request. ++ */ ++ req = cvlt_alloc_req(parch); ++ ++ if (!req) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED, ++ " failed to allocated request for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ /* ++ * Initialize the request object. ++ */ ++ req->op_type = CVLT_RESTORE_OP; ++ req->frame = frame; ++ ++ /* ++ * The file is currently residing inside a data management store. ++ * To restore the file contents we need to provide the information ++ * about data management store. ++ */ ++ op_ret = cvlt_init_store_info(parch, &(req->store_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract store info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract file info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ /* ++ * We need t perform in-place restore of the file from data managment ++ * store to gusterfs volume. ++ */ ++ op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract destination store info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract file info for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ /* ++ * Submit the restore request. ++ */ ++ op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info), ++ &(req->file_info), &dest_storeinfo, ++ &dest_fileinfo, &op_errno, ++ cvlt_download_complete, req); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, ++ " failed to restore file gfid=%s from data managment store", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ /* ++ * Wait for the restore to complete. ++ */ ++ sem_wait(&(req->sem)); ++ ++ if (req->op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED, ++ " restored failed for gfid=%s", uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ if (req) { ++ cvlt_free_req(parch, req); ++ } ++ ++ return 0; ++ ++err: ++ ++ if (req) { ++ cvlt_free_req(parch, req); ++ } ++ ++ return -1; ++} ++ ++int ++cvlt_read(call_frame_t *frame, void *config) ++{ ++ int32_t op_ret = -1; ++ int32_t op_errno = 0; ++ archive_t *parch = NULL; ++ cvlt_request_t *req = NULL; ++ struct iovec iov = { ++ 0, ++ }; ++ struct iobref *iobref; ++ size_t size = 0; ++ off_t off = 0; ++ ++ cs_local_t *local = frame->local; ++ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr; ++ ++ size = local->xattrinfo.size; ++ off = local->xattrinfo.offset; ++ ++ parch = (archive_t *)config; ++ ++ if (strcmp(parch->trailer, CVLT_TRAILER)) { ++ op_ret = -1; ++ op_errno = EINVAL; ++ goto err; ++ } ++ ++ gf_msg_debug(plugin, 0, ++ " read invoked for gfid = %s offset = %" PRIu64 ++ " file_size = %" PRIu64, ++ uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size); ++ ++ if (off >= local->stbuf.ia_size) { ++ /* ++ * Hack to notify higher layers of EOF. ++ */ ++ ++ op_errno = ENOENT; ++ op_ret = 0; ++ ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, ++ " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid)); ++ ++ goto err; ++ } ++ ++ if (!size) { ++ op_errno = EINVAL; ++ ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED, ++ " zero size read attempted on gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ goto err; ++ } ++ ++ if (!(parch->fops.read)) { ++ op_errno = ELIBBAD; ++ goto err; ++ } ++ ++ /* ++ * The read request need to be processed. Allocate a request. ++ */ ++ req = cvlt_alloc_req(parch); ++ ++ if (!req) { ++ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY, ++ " failed to allocated request for gfid=%s", ++ uuid_utoa(locxattr->gfid)); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE); ++ if (!req->iobuf) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ /* ++ * Initialize the request object. ++ */ ++ req->op_type = CVLT_READ_OP; ++ req->offset = off; ++ req->bytes = size; ++ req->frame = frame; ++ req->szxattr.size = local->stbuf.ia_size; ++ req->szxattr.blocks = local->stbuf.ia_blocks; ++ req->szxattr.blksize = local->stbuf.ia_blksize; ++ ++ /* ++ * The file is currently residing inside a data management store. ++ * To read the file contents we need to provide the information ++ * about data management store. ++ */ ++ op_ret = cvlt_init_store_info(parch, &(req->store_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract store info for gfid=%s" ++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET ++ ", " ++ " buf=%p", ++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); ++ goto err; ++ } ++ ++ op_ret = cvlt_init_file_info(locxattr, &(req->file_info)); ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " failed to extract file info for gfid=%s" ++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET ++ ", " ++ " buf=%p", ++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); ++ goto err; ++ } ++ ++ /* ++ * Submit the read request. ++ */ ++ op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info), ++ &(req->file_info), off, req->iobuf->ptr, size, ++ &op_errno, cvlt_readv_complete, req); ++ ++ if (op_ret < 0) { ++ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED, ++ " read failed on gfid=%s" ++ " offset=%" PRIu64 " size=%" GF_PRI_SIZET ++ ", " ++ " buf=%p", ++ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr); ++ goto err; ++ } ++ ++ return 0; ++ ++err: ++ ++ iobref = iobref_new(); ++ gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d", ++ op_ret, op_errno); ++ ++ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, ++ &(local->stbuf), iobref, local->xattr_rsp); ++ ++ if (iobref) { ++ iobref_unref(iobref); ++ } ++ ++ if (req) { ++ cvlt_free_req(parch, req); ++ } ++ ++ return 0; ++} +diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h +new file mode 100644 +index 0000000..c45ac94 +--- /dev/null ++++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h +@@ -0,0 +1,84 @@ ++/* ++ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++#ifndef _LIBCVLT_H ++#define _LIBCVLT_H ++ ++#include <semaphore.h> ++#include <glusterfs/xlator.h> ++#include <glusterfs/glusterfs.h> ++#include <glusterfs/call-stub.h> ++#include <glusterfs/syncop.h> ++#include <glusterfs/compat-errno.h> ++#include "cloudsync-common.h" ++#include "libcvlt-mem-types.h" ++#include "archivestore.h" ++ ++enum _cvlt_op { ++ CVLT_READ_OP = 1, ++ CVLT_WRITE_OP = 2, ++ CVLT_RESTORE_OP = 3, ++ CVLT_ARCHIVE_OP = 4, ++ CVLT_LOOKUP_OP = 5, ++ CVLT_XATTR_OP = 6, ++ CVLT_STAT_OP = 7, ++ CVLT_FSTAT_op = 8, ++ CVLT_UNDEF_OP = 127 ++}; ++typedef enum _cvlt_op cvlt_op_t; ++ ++struct _archive; ++struct _cvlt_request { ++ uint64_t offset; ++ uint64_t bytes; ++ struct iobuf *iobuf; ++ struct iobref *iobref; ++ call_frame_t *frame; ++ cvlt_op_t op_type; ++ int32_t op_ret; ++ int32_t op_errno; ++ xlator_t *this; ++ sem_t sem; ++ archstore_info_t store_info; ++ archstore_fileinfo_t file_info; ++ cs_size_xattr_t szxattr; ++}; ++typedef struct _cvlt_request cvlt_request_t; ++ ++struct _archive { ++ gf_lock_t lock; /* lock for controlling access */ ++ xlator_t *xl; /* xlator */ ++ void *handle; /* handle returned from dlopen */ ++ int32_t nreqs; /* num requests active */ ++ struct mem_pool *req_pool; /* pool for requests */ ++ struct iobuf_pool *iobuf_pool; /* iobuff pool */ ++ archstore_desc_t descinfo; /* Archive store descriptor info */ ++ archstore_methods_t fops; /* function pointers */ ++ char *product_id; ++ char *store_id; ++ char *trailer; ++}; ++typedef struct _archive archive_t; ++ ++void * ++cvlt_init(xlator_t *); ++ ++int ++cvlt_reconfigure(xlator_t *, dict_t *); ++ ++void ++cvlt_fini(void *); ++ ++int ++cvlt_download(call_frame_t *, void *); ++ ++int ++cvlt_read(call_frame_t *, void *); ++ ++#endif +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index 2240fc3..8026b05 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -39,7 +39,11 @@ struct cs_plugin plugins[] = { + {.name = "cloudsyncs3", + .library = "cloudsyncs3.so", + .description = "cloudsync s3 store."}, +- ++#if defined(__linux__) ++ {.name = "cvlt", ++ .library = "cloudsynccvlt.so", ++ .description = "Commvault content store."}, ++#endif + {.name = NULL}, + }; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 73abf37..7a83124 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3724,6 +3724,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "features.cloudsync-remote-read", + .voltype = "features/cloudsync", + .value = "off", +- .op_version = GD_OP_VERSION_6_0, ++ .op_version = GD_OP_VERSION_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ {.key = "features.cloudsync-store-id", ++ .voltype = "features/cloudsync", ++ .op_version = GD_OP_VERSION_7_0, ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ {.key = "features.cloudsync-product-id", ++ .voltype = "features/cloudsync", ++ .op_version = GD_OP_VERSION_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = NULL}}; +-- +1.8.3.1 + diff --git a/SOURCES/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch b/SOURCES/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch new file mode 100644 index 0000000..937a772 --- /dev/null +++ b/SOURCES/0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch @@ -0,0 +1,114 @@ +From 693fcf327eace37fe698953b90050d67fc840ac6 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur <atalur@commvault.com> +Date: Wed, 24 Apr 2019 12:06:23 -0700 +Subject: [PATCH 155/169] cloudsync: Make readdirp return stat info of all the + dirents + +This change got missed while the initial changes were sent. +Should have been a part of : + https://review.gluster.org/#/c/glusterfs/+/21757/ + +Gist of the change: + Function that fills in stat info for dirents is +invoked in readdirp in posix when cloudsync populates xdata +request with GF_CS_OBJECT_STATUS. + +backport of:https://review.gluster.org/#/c/glusterfs/+/22616/ + +> Change-Id: Ide0c4e80afb74cd2120f74ba934ed40123152d69 +> updates: bz#1642168 +> Signed-off-by: Anuradha Talur <atalur@commvault.com> + +Change-Id: I77de3f9d8ae01a0280a9d1753f94d74b5e5ce2fd +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172193 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/features/cloudsync/src/cloudsync-fops-c.py | 2 +- + xlators/features/cloudsync/src/cloudsync.c | 35 ++++++++++++++++++++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 2 ++ + 3 files changed, 38 insertions(+), 1 deletion(-) + +diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py +index a7a2201..8878b70 100755 +--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py ++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py +@@ -285,7 +285,7 @@ loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr', + + # These fops need a separate implementation + special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr', +- 'truncate', 'fstat', 'readv'] ++ 'truncate', 'fstat', 'readv', 'readdirp'] + + def gen_defaults(): + for name in ops: +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index 8026b05..26e512c 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -280,6 +280,40 @@ out: + } + + int32_t ++cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t off, dict_t *xdata) ++{ ++ int ret = 0; ++ int op_errno = ENOMEM; ++ ++ if (!xdata) { ++ xdata = dict_new(); ++ if (!xdata) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, ++ "failed to create " ++ "dict"); ++ goto err; ++ } ++ } ++ ++ ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, 0, ++ "dict_set failed key:" ++ " %s", ++ GF_CS_OBJECT_STATUS); ++ goto err; ++ } ++ ++ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); ++ return 0; ++err: ++ STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL); ++ return 0; ++} ++ ++int32_t + cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +@@ -2026,6 +2060,7 @@ cs_notify(xlator_t *this, int event, void *data, ...) + + struct xlator_fops cs_fops = { + .stat = cs_stat, ++ .readdirp = cs_readdirp, + .truncate = cs_truncate, + .seek = cs_seek, + .statfs = cs_statfs, +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 065fced..2c19ce1 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -5472,6 +5472,8 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, + continue; + } + ++ posix_update_iatt_buf(&stbuf, -1, hpath, dict); ++ + if (!inode) + inode = inode_find(itable, stbuf.ia_gfid); + +-- +1.8.3.1 + diff --git a/SOURCES/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch b/SOURCES/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch new file mode 100644 index 0000000..1a73388 --- /dev/null +++ b/SOURCES/0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch @@ -0,0 +1,94 @@ +From d8c98e9785e652692d928a2efbbb571703f728b0 Mon Sep 17 00:00:00 2001 +From: Anuradha Talur <atalur@commvault.com> +Date: Wed, 24 Apr 2019 12:35:08 -0700 +Subject: [PATCH 156/169] cloudsync: Fix bug in cloudsync-fops-c.py + +In some of the fops generated by generator.py, xdata request +was not being wound to the child xlator correctly. + +This was happening because when though the logic in +cloudsync-fops-c.py was correct, generator.py was generating +a resultant code that omits this logic. + +Made changes in cloudsync-fops-c.py so that correct code is +produced. + +backport of: https://review.gluster.org/#/c/glusterfs/+/22617/ + +> Change-Id: I6f25bdb36ede06fd03be32c04087a75639d79150 +> updates: bz#1642168 +> Signed-off-by: Anuradha Talur <atalur@commvault.com> + +Change-Id: I87cc71e98c2c6cec78a6e84850fc8d82f8dd4dfd +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172195 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/features/cloudsync/src/cloudsync-fops-c.py | 24 +++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py +index 8878b70..c444ea6 100755 +--- a/xlators/features/cloudsync/src/cloudsync-fops-c.py ++++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py +@@ -39,7 +39,15 @@ cs_@NAME@ (call_frame_t *frame, xlator_t *this, + else + state = GF_CS_LOCAL; + +- local->xattr_req = xdata ? dict_ref (xdata) : (xdata = dict_new ()); ++ xdata = xdata ? dict_ref (xdata) : dict_new (); ++ ++ if (!xdata) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ local->xattr_req = xdata; + + ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { +@@ -187,19 +195,29 @@ int32_t + cs_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) + { ++ int op_errno = EINVAL; + cs_local_t *local = NULL; + int ret = 0; + + local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@); + if (!local) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL"); ++ op_errno = ENOMEM; + goto err; + } + + if (loc->inode->ia_type == IA_IFDIR) + goto wind; + +- local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); ++ xdata = xdata ? dict_ref (xdata) : dict_new (); ++ ++ if (!xdata) { ++ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory"); ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ local->xattr_req = xdata; + + ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1); + if (ret) { +@@ -215,7 +233,7 @@ wind: + + return 0; + err: +- CS_STACK_UNWIND (@NAME@, frame, -1, errno, @CBK_ERROR_ARGS@); ++ CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch b/SOURCES/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch new file mode 100644 index 0000000..185a24a --- /dev/null +++ b/SOURCES/0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch @@ -0,0 +1,68 @@ +From 4a72ac20f728aa5c3141359ff89f1b61d4cd210a Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Fri, 17 May 2019 23:03:35 +0530 +Subject: [PATCH 157/169] afr/frame: Destroy frame after + afr_selfheal_entry_granular + +In function "afr_selfheal_entry_granular", after completing the +heal we are not destroying the frame. This will lead to crash. +when we execute statedump operation, where it tried to access +xlator object. If this xlator object is freed as part of the +graph destroy this will lead to an invalid memory access + +Upstream patch:https://review.gluster.org/22743 + +>Change-Id: I0a5e78e704ef257c3ac0087eab2c310e78fbe36d +>fixes: bz#1708926 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I326354008e6d98376c8333d270f2f80036ad07f0 +BUG: 1716626 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172282 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/cluster/afr/src/afr-self-heal-entry.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index fc09b4c..a6890fa 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -832,6 +832,8 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd, + subvol = priv->children[subvol_idx]; + + args.frame = afr_copy_frame(frame); ++ if (!args.frame) ++ goto out; + args.xl = this; + /* args.heal_fd represents the fd associated with the original directory + * on which entry heal is being attempted. +@@ -850,9 +852,10 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd, + * do not treat heal as failure. + */ + if (is_src) +- return -errno; ++ ret = -errno; + else +- return 0; ++ ret = 0; ++ goto out; + } + + ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args, +@@ -862,7 +865,9 @@ afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd, + + if (args.mismatch == _gf_true) + ret = -1; +- ++out: ++ if (args.frame) ++ AFR_STACK_DESTROY(args.frame); + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch b/SOURCES/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch new file mode 100644 index 0000000..d12e81d --- /dev/null +++ b/SOURCES/0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch @@ -0,0 +1,162 @@ +From 11b64d494c52004002f900888694d20ef8af6df6 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Sat, 11 May 2019 22:40:22 +0530 +Subject: [PATCH 158/169] glusterfsd/cleanup: Protect graph object under a lock + +While processing a cleanup_and_exit function, we are +accessing a graph object. But this has not been protected +under a lock. Because a parallel cleanup of a graph is quite +possible which might lead to an invalid memory access + +Upstream patch:https://review.gluster.org/#/c/glusterfs/+/22709/ + +>Change-Id: Id05ca70d5b57e172b0401d07b6a1f5386c044e79 +>fixes: bz#1708926 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I55ab0525c79baa99a3bd929ee979c5519be5ab21 +BUG: 1716626 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172283 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + libglusterfs/src/graph.c | 58 +++++++++++++++---------- + libglusterfs/src/statedump.c | 16 +++++-- + tests/bugs/glusterd/optimized-basic-testcases.t | 4 +- + 3 files changed, 50 insertions(+), 28 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 4c8b02d..18fb2d9 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1392,8 +1392,12 @@ glusterfs_graph_cleanup(void *arg) + } + pthread_mutex_unlock(&ctx->notify_lock); + +- glusterfs_graph_fini(graph); +- glusterfs_graph_destroy(graph); ++ pthread_mutex_lock(&ctx->cleanup_lock); ++ { ++ glusterfs_graph_fini(graph); ++ glusterfs_graph_destroy(graph); ++ } ++ pthread_mutex_unlock(&ctx->cleanup_lock); + out: + return NULL; + } +@@ -1468,31 +1472,37 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + + if (!ctx || !ctx->active || !volfile_obj) + goto out; +- parent_graph = ctx->active; +- graph = volfile_obj->graph; +- if (!graph) +- goto out; +- if (graph->first) +- xl = graph->first; + +- last_xl = graph->last_xl; +- if (last_xl) +- last_xl->next = NULL; +- if (!xl || xl->cleanup_starting) +- goto out; ++ pthread_mutex_lock(&ctx->cleanup_lock); ++ { ++ parent_graph = ctx->active; ++ graph = volfile_obj->graph; ++ if (!graph) ++ goto unlock; ++ if (graph->first) ++ xl = graph->first; ++ ++ last_xl = graph->last_xl; ++ if (last_xl) ++ last_xl->next = NULL; ++ if (!xl || xl->cleanup_starting) ++ goto unlock; + +- xl->cleanup_starting = 1; +- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, +- "detaching child %s", volfile_obj->vol_id); ++ xl->cleanup_starting = 1; ++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, ++ "detaching child %s", volfile_obj->vol_id); + +- list_del_init(&volfile_obj->volfile_list); +- glusterfs_mux_xlator_unlink(parent_graph->top, xl); +- parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); +- parent_graph->xl_count -= graph->xl_count; +- parent_graph->leaf_count -= graph->leaf_count; +- default_notify(xl, GF_EVENT_PARENT_DOWN, xl); +- parent_graph->id++; +- ret = 0; ++ list_del_init(&volfile_obj->volfile_list); ++ glusterfs_mux_xlator_unlink(parent_graph->top, xl); ++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); ++ parent_graph->xl_count -= graph->xl_count; ++ parent_graph->leaf_count -= graph->leaf_count; ++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl); ++ parent_graph->id++; ++ ret = 0; ++ } ++unlock: ++ pthread_mutex_unlock(&ctx->cleanup_lock); + out: + if (!ret) { + list_del_init(&volfile_obj->volfile_list); +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index 0cf80c0..0d58f8f 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -805,11 +805,17 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + int brick_count = 0; + int len = 0; + +- gf_proc_dump_lock(); +- + if (!ctx) + goto out; + ++ /* ++ * Multiplexed daemons can change the active graph when attach/detach ++ * is called. So this has to be protected with the cleanup lock. ++ */ ++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) ++ pthread_mutex_lock(&ctx->cleanup_lock); ++ gf_proc_dump_lock(); ++ + if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) && + (ctx && ctx->active)) { + top = ctx->active->first; +@@ -923,7 +929,11 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + out: + GF_FREE(dump_options.dump_path); + dump_options.dump_path = NULL; +- gf_proc_dump_unlock(); ++ if (ctx) { ++ gf_proc_dump_unlock(); ++ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) ++ pthread_mutex_unlock(&ctx->cleanup_lock); ++ } + + return; + } +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index d700b5e..110f1b9 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -289,7 +289,9 @@ mkdir -p /xyz/var/lib/glusterd/abc + TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc + EXPECT 'Created' volinfo_field "test" 'Status'; + +-EXPECT "1" generate_statedump_and_check_for_glusterd_info ++#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause ++#failure. So Adding a EXPECT_WITHIN ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info + + cleanup_statedump `pidof glusterd` + cleanup +-- +1.8.3.1 + diff --git a/SOURCES/0159-glusterd-add-an-op-version-check.patch b/SOURCES/0159-glusterd-add-an-op-version-check.patch new file mode 100644 index 0000000..323ae95 --- /dev/null +++ b/SOURCES/0159-glusterd-add-an-op-version-check.patch @@ -0,0 +1,66 @@ +From bd087c3d2766b81b25ea7bbe425b55023fd12545 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Wed, 15 May 2019 07:35:45 +0530 +Subject: [PATCH 159/169] glusterd: add an op-version check + +Problem: "gluster v status" is hung in heterogenous cluster +when issued from a non-upgraded node. + +Cause: commit 34e010d64 fixes the txn-opinfo mem leak +in op-sm framework by not setting the txn-opinfo if some +conditions are true. When vol status is issued from a +non-upgraded node, command is hanging in its upgraded peer +as the upgraded node setting the txn-opinfo based on new +conditions where as non-upgraded nodes are following diff +conditions. + +Fix: Add an op-version check, so that all the nodes follow +same set of conditions to set txn-opinfo. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22730/ + +BUG: 1707246 + +> fixes: bz#1710159 +> Change-Id: Ie1f353212c5931ddd1b728d2e6949dfe6225c4ab +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +Change-Id: Ie1f353212c5931ddd1b728d2e6949dfe6225c4ab +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172307 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 94a5e1f..d0c1a2c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -8158,9 +8158,12 @@ glusterd_op_sm() + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; ++ glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); ++ priv = this->private; ++ GF_ASSERT(priv); + + ret = synclock_trylock(&gd_op_sm_lock); + if (ret) { +@@ -8238,7 +8241,8 @@ glusterd_op_sm() + "Unable to clear " + "transaction's opinfo"); + } else { +- if (!(event_type == GD_OP_EVENT_STAGE_OP && ++ if ((priv->op_version < GD_OP_VERSION_6_0) || ++ !(event_type == GD_OP_EVENT_STAGE_OP && + opinfo.state.state == GD_OP_STATE_STAGED && + opinfo.skip_locking)) { + ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo); +-- +1.8.3.1 + diff --git a/SOURCES/0160-geo-rep-Geo-rep-help-text-issue.patch b/SOURCES/0160-geo-rep-Geo-rep-help-text-issue.patch new file mode 100644 index 0000000..efba5a4 --- /dev/null +++ b/SOURCES/0160-geo-rep-Geo-rep-help-text-issue.patch @@ -0,0 +1,41 @@ +From 77df6b8930fd4acf3d0c38220fa4317ee97d530f Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Thu, 9 May 2019 10:43:01 +0530 +Subject: [PATCH 160/169] geo-rep: Geo-rep help text issue + +Modified Geo-rep help text for better sanity. + +>fixes: bz#1652887 +>Change-Id: I40ef7ef709eaecf0125ab4b4a7517e2c5d1ef4a0 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/22689/ + +BUG: 1480907 +Change-Id: I40ef7ef709eaecf0125ab4b4a7517e2c5d1ef4a0 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172316 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-volume.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 3432dbe..564aef7 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3422,8 +3422,8 @@ struct cli_cmd volume_cmds[] = { + "reset all the reconfigured options"}, + + #if (SYNCDAEMON_COMPILE) +- {"volume " GEOREP " [<VOLNAME>] [<SLAVE-URL>] {\\\n create [[ssh-port n] " +- "[[no-verify] | [push-pem]]] [force] \\\n" ++ {"volume " GEOREP " [<MASTER-VOLNAME>] [<SLAVE-IP>]::[<SLAVE-VOLNAME>] {" ++ "\\\n create [[ssh-port n] [[no-verify] \\\n | [push-pem]]] [force] \\\n" + " | start [force] \\\n | stop [force] \\\n | pause [force] \\\n | resume " + "[force] \\\n" + " | config [[[\\!]<option>] [<value>]] \\\n | status " +-- +1.8.3.1 + diff --git a/SOURCES/0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch b/SOURCES/0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch new file mode 100644 index 0000000..4522ec4 --- /dev/null +++ b/SOURCES/0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch @@ -0,0 +1,289 @@ +From 69ac1fd2da7a57f2f0854412863911959bf71fde Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Tue, 2 Apr 2019 12:38:09 +0530 +Subject: [PATCH 161/169] geo-rep: Fix rename with existing destination with + same gfid + +Problem: + Geo-rep fails to sync the rename properly if destination exists. +It results in source to be remained on slave causing more number of +files on slave. Also heavy rename workload like logrotate caused +lot of ESTALE errors + +Cause: + Geo-rep fails to sync rename if destination exists if creation +of source file also falls into single batch of changelogs being +processed. This is because, after fixing problematic gfids verifying +from master, while re-processing original entries, CREATE also was +re-processed causing more files on slave and rename to be failed. + +Solution: + Entries need to be removed from retrial list after fixing +problematic gfids on slave so that it's not re-created again on slave. + Also treat ESTALE as EEXIST so that the error is properly handled +verifying the op on master volume. + +Backport of: + > Patch: https://review.gluster.org/22519 + > Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9 + > BUG: 1694820 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + > Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9 +fixes: bz#1708121 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172393 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/syncdaemon/master.py | 41 +++++++++++++++++++++-- + geo-replication/syncdaemon/resource.py | 10 ++++-- + tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t | 5 +++ + tests/00-geo-rep/georep-basic-dr-rsync.t | 5 +++ + tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t | 5 +++ + tests/00-geo-rep/georep-basic-dr-tarssh.t | 5 +++ + tests/geo-rep.rc | 36 ++++++++++++++++++++ + 7 files changed, 102 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 3da7610..42c86d7 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -65,6 +65,9 @@ def _volinfo_hook_relax_foreign(self): + def edct(op, **ed): + dct = {} + dct['op'] = op ++ # This is used in automatic gfid conflict resolution. ++ # When marked True, it's skipped during re-processing. ++ dct['skip_entry'] = False + for k in ed: + if k == 'stat': + st = ed[k] +@@ -792,6 +795,7 @@ class GMasterChangelogMixin(GMasterCommon): + pfx = gauxpfx() + fix_entry_ops = [] + failures1 = [] ++ remove_gfids = set() + for failure in failures: + if failure[2]['name_mismatch']: + pbname = failure[2]['slave_entry'] +@@ -822,6 +826,18 @@ class GMasterChangelogMixin(GMasterCommon): + edct('UNLINK', + gfid=failure[2]['slave_gfid'], + entry=pbname)) ++ remove_gfids.add(slave_gfid) ++ if op in ['RENAME']: ++ # If renamed gfid doesn't exists on master, remove ++ # rename entry and unlink src on slave ++ st = lstat(os.path.join(pfx, failure[0]['gfid'])) ++ if isinstance(st, int) and st == ENOENT: ++ logging.debug("Unlink source %s" % repr(failure)) ++ remove_gfids.add(failure[0]['gfid']) ++ fix_entry_ops.append( ++ edct('UNLINK', ++ gfid=failure[0]['gfid'], ++ entry=failure[0]['entry'])) + # Takes care of scenarios of hardlinks/renames on master + elif not isinstance(st, int): + if matching_disk_gfid(slave_gfid, pbname): +@@ -831,7 +847,12 @@ class GMasterChangelogMixin(GMasterCommon): + ' Safe to ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) +- entries.remove(failure[0]) ++ remove_gfids.add(failure[0]['gfid']) ++ if op == 'RENAME': ++ fix_entry_ops.append( ++ edct('UNLINK', ++ gfid=failure[0]['gfid'], ++ entry=failure[0]['entry'])) + # The file exists on master but with different name. + # Probably renamed and got missed during xsync crawl. + elif failure[2]['slave_isdir']: +@@ -856,7 +877,10 @@ class GMasterChangelogMixin(GMasterCommon): + 'take out entry', + retry_count=retry_count, + entry=repr(failure))) +- entries.remove(failure[0]) ++ try: ++ entries.remove(failure[0]) ++ except ValueError: ++ pass + else: + rename_dict = edct('RENAME', gfid=slave_gfid, + entry=src_entry, +@@ -896,7 +920,10 @@ class GMasterChangelogMixin(GMasterCommon): + 'ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) +- entries.remove(failure[0]) ++ try: ++ entries.remove(failure[0]) ++ except ValueError: ++ pass + else: + logging.info(lf('Fixing ENOENT error in slave. Create ' + 'parent directory on slave.', +@@ -913,6 +940,14 @@ class GMasterChangelogMixin(GMasterCommon): + edct('MKDIR', gfid=pargfid, entry=dir_entry, + mode=st.st_mode, uid=st.st_uid, gid=st.st_gid)) + ++ logging.debug("remove_gfids: %s" % repr(remove_gfids)) ++ if remove_gfids: ++ for e in entries: ++ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \ ++ and e['gfid'] in remove_gfids: ++ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e)) ++ e['skip_entry'] = True ++ + if fix_entry_ops: + # Process deletions of entries whose gfids are mismatched + failures1 = self.slave.server.entry_ops(fix_entry_ops) +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index c290d86..f54ccd9 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -426,7 +426,7 @@ class Server(object): + e['stat']['uid'] = uid + e['stat']['gid'] = gid + +- if cmd_ret == EEXIST: ++ if cmd_ret in [EEXIST, ESTALE]: + if dst: + en = e['entry1'] + else: +@@ -510,6 +510,12 @@ class Server(object): + entry = e['entry'] + uid = 0 + gid = 0 ++ ++ # Skip entry processing if it's marked true during gfid ++ # conflict resolution ++ if e['skip_entry']: ++ continue ++ + if e.get("stat", {}): + # Copy UID/GID value and then reset to zero. Copied UID/GID + # will be used to run chown once entry is created. +@@ -688,7 +694,7 @@ class Server(object): + if blob: + cmd_ret = errno_wrap(Xattr.lsetxattr, + [pg, 'glusterfs.gfid.newfile', blob], +- [EEXIST, ENOENT], ++ [EEXIST, ENOENT, ESTALE], + [ESTALE, EINVAL, EBUSY]) + collect_failure(e, cmd_ret, uid, gid) + +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t +index 67ac167..1a55ed2 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t +@@ -203,6 +203,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} + TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file" + TEST "echo sampledata > $master_mnt/rsync_option_test_file" + ++#rename with existing destination case BUG:1694820 ++TEST create_rename_with_existing_destination ${master_mnt} ++#verify rename with existing destination case BUG:1694820 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} ++ + #Verify arequal for whole volume + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} + +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index 8b64370..d0c0fc9 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -204,6 +204,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} + TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file" + TEST "echo sampledata > $master_mnt/rsync_option_test_file" + ++#rename with existing destination case BUG:1694820 ++TEST create_rename_with_existing_destination ${master_mnt} ++#verify rename with existing destination case BUG:1694820 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} ++ + #Verify arequal for whole volume + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} + +diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t +index 1726d0b..cb530ad 100644 +--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t ++++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t +@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/s + #rsnapshot usecase + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} + ++#rename with existing destination case BUG:1694820 ++TEST create_rename_with_existing_destination ${master_mnt} ++#verify rename with existing destination case BUG:1694820 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} ++ + #Verify arequal for whole volume + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} + +diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t +index c5d16ac..9e2f613 100644 +--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t ++++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t +@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt} + #rsnapshot usecase + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} + ++#rename with existing destination case BUG:1694820 ++TEST create_rename_with_existing_destination ${master_mnt} ++#verify rename with existing destination case BUG:1694820 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} ++ + #Verify arequal for whole volume + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} + +diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc +index d723129..e357ba8 100644 +--- a/tests/geo-rep.rc ++++ b/tests/geo-rep.rc +@@ -403,3 +403,39 @@ function check_slave_read_only() + gluster volume info $1 | grep 'features.read-only: on' + echo $? + } ++ ++function create_rename_with_existing_destination() ++{ ++ dir=$1/rename_with_existing_destination ++ mkdir $dir ++ for i in {1..5} ++ do ++ echo "Data_set$i" > $dir/data_set$i ++ mv $dir/data_set$i $dir/data_set -f ++ done ++} ++ ++function verify_rename_with_existing_destination() ++{ ++ dir=$1/rename_with_existing_destination ++ ++ if [ ! -d $dir ]; then ++ echo 1 ++ elif [ ! -f $dir/data_set ]; then ++ echo 2 ++ elif [ -f $dir/data_set1 ]; then ++ echo 3 ++ elif [ -f $dir/data_set2 ]; then ++ echo 4 ++ elif [ -f $dir/data_set3 ]; then ++ echo 5 ++ elif [ -f $dir/data_set4 ]; then ++ echo 6 ++ elif [ -f $dir/data_set5 ]; then ++ echo 7 ++ elif test "XData_set5" != "X$(cat $dir/data_set)"; then ++ echo 8 ++ else ++ echo 0 ++ fi ++} +-- +1.8.3.1 + diff --git a/SOURCES/0162-geo-rep-Fix-sync-method-config.patch b/SOURCES/0162-geo-rep-Fix-sync-method-config.patch new file mode 100644 index 0000000..ed8ab94 --- /dev/null +++ b/SOURCES/0162-geo-rep-Fix-sync-method-config.patch @@ -0,0 +1,210 @@ +From d148248aa3f0dfe7356a13d6fd029f0c6b3746cf Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Wed, 8 May 2019 10:56:31 +0530 +Subject: [PATCH 162/169] geo-rep: Fix sync-method config + +Problem: +When 'use_tarssh' is set to true, it exits with successful +message but the default 'rsync' was used as sync-engine. +The new config 'sync-method' is not allowed to set from cli. + +Analysis and Fix: +The 'use_tarssh' config is deprecated with new +config framework and 'sync-method' is the new +config to choose sync-method i.e. tarssh or rsync. +This patch fixes the 'sync-method' config. The allowed +values are tarssh and rsync. + +Backport of: + > Patch: https://review.gluster.org/22683 + > Change-Id: I0edb0319cad0455b29e49f2f08a64ce324735e84 + > BUG: 1707686 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: I0edb0319cad0455b29e49f2f08a64ce324735e84 +fixes: bz#1708067 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172394 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/gsyncd.conf.in | 9 +++++---- + geo-replication/syncdaemon/resource.py | 7 ++++--- + tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t | 4 ++-- + tests/00-geo-rep/georep-basic-dr-rsync.t | 4 ++-- + tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t | 6 +++--- + tests/00-geo-rep/georep-basic-dr-tarssh.t | 6 +++--- + tests/geo-rep.rc | 3 ++- + 7 files changed, 21 insertions(+), 18 deletions(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 6160c7c..c2e4f0d 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -128,10 +128,11 @@ value= + value=5 + type=int + +-[use-tarssh] +-value=false +-type=bool +-help=Use sync-mode as tarssh ++[sync-method] ++value=rsync ++help=Sync method for data sync. Available methods are tar over ssh and rsync. Default is rsync. ++validation=choice ++allowed_values=tarssh,rsync + + [remote-gsyncd] + value = +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index f54ccd9..522279b 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -1512,7 +1512,7 @@ class SSH(object): + + return po + +- def tarssh(self, files, slaveurl, log_err=False): ++ def tarssh(self, files, log_err=False): + """invoke tar+ssh + -z (compress) can be use if needed, but omitting it now + as it results in weird error (tar+ssh errors out (errcode: 2) +@@ -1520,10 +1520,11 @@ class SSH(object): + if not files: + raise GsyncdError("no files to sync") + logging.debug("files: " + ", ".join(files)) +- (host, rdir) = slaveurl.split(':') ++ (host, rdir) = self.slaveurl.split(':') ++ + tar_cmd = ["tar"] + \ + ["--sparse", "-cf", "-", "--files-from", "-"] +- ssh_cmd = gconf.get("ssh-command-tar").split() + \ ++ ssh_cmd = gconf.get("ssh-command").split() + \ + gconf.get("ssh-options-tar").split() + \ + ["-p", str(gconf.get("ssh-port"))] + \ + [host, "tar"] + \ +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t +index 1a55ed2..8b90347 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t +@@ -159,7 +159,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1 + + #logrotate + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate + + #CREATE+RENAME + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file +@@ -209,7 +209,7 @@ TEST create_rename_with_existing_destination ${master_mnt} + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} + + #Verify arequal for whole volume +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} + + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave stop +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index d0c0fc9..428e9ed 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -160,7 +160,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1 + + #logrotate + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate + + #CREATE+RENAME + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file +@@ -210,7 +210,7 @@ TEST create_rename_with_existing_destination ${master_mnt} + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} + + #Verify arequal for whole volume +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} + + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave stop +diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t +index cb530ad..8fed929 100644 +--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t ++++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t +@@ -81,7 +81,7 @@ TEST $GEOREP_CLI $master $slave config use_meta_volume true + TEST $CLI volume set $GMV0 changelog.rollover-time 3 + + #Config tarssh as sync-engine +-TEST $GEOREP_CLI $master $slave config use_tarssh true ++TEST $GEOREP_CLI $master $slave config sync-method tarssh + + #Wait for common secret pem file to be created + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file +@@ -162,7 +162,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1 + + #logrotate + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate + + #CREATE+RENAME + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file +@@ -208,7 +208,7 @@ TEST create_rename_with_existing_destination ${master_mnt} + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} + + #Verify arequal for whole volume +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} + + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave stop +diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t +index 9e2f613..feb2de7 100644 +--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t ++++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t +@@ -81,7 +81,7 @@ TEST $GEOREP_CLI $master $slave config use_meta_volume true + TEST $CLI volume set $GMV0 changelog.rollover-time 3 + + #Config tarssh as sync-engine +-TEST $GEOREP_CLI $master $slave config use_tarssh true ++TEST $GEOREP_CLI $master $slave config sync-method tarssh + + #Wait for common secret pem file to be created + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file +@@ -162,7 +162,7 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1 + + #logrotate + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate + + #CREATE+RENAME + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file +@@ -208,7 +208,7 @@ TEST create_rename_with_existing_destination ${master_mnt} + EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} + + #Verify arequal for whole volume +-EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} + + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave stop +diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc +index e357ba8..2035b9f 100644 +--- a/tests/geo-rep.rc ++++ b/tests/geo-rep.rc +@@ -168,7 +168,8 @@ function arequal_checksum() + { + master=$1 + slave=$2 +- diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l ++ ret=$(diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l) ++ echo x$ret + } + + function symlink_ok() +-- +1.8.3.1 + diff --git a/SOURCES/0163-geo-rep-Fix-sync-hang-with-tarssh.patch b/SOURCES/0163-geo-rep-Fix-sync-hang-with-tarssh.patch new file mode 100644 index 0000000..9b90128 --- /dev/null +++ b/SOURCES/0163-geo-rep-Fix-sync-hang-with-tarssh.patch @@ -0,0 +1,255 @@ +From 29ec87484b1ee3ad6417c37726db8aa9296f3a83 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Wed, 8 May 2019 11:26:06 +0530 +Subject: [PATCH 163/169] geo-rep: Fix sync hang with tarssh + +Problem: +Geo-rep sync hangs when tarssh is used as sync +engine at heavy workload. + +Analysis and Root cause: +It's found out that the tar process was hung. +When debugged further, it's found out that stderr +buffer of tar process on master was full i.e., 64k. +When the buffer was copied to a file from /proc/pid/fd/2, +the hang is resolved. + +This can happen when files picked by tar process +to sync doesn't exist on master anymore. If this count +increases around 1k, the stderr buffer is filled up. + +Fix: +The tar process is executed using Popen with stderr as PIPE. +The final execution is something like below. + +tar | ssh <args> root@slave tar --overwrite -xf - -C <path> + +It was waiting on ssh process first using communicate() and then tar. +Note that communicate() reads stdout and stderr. So when stderr of tar +process is filled up, there is no one to read until untar via ssh is +completed. This can't happen and leads to deadlock. +Hence we should be waiting on both process parallely, so that stderr is +read on both processes. + +Backport of: + > Patch: https://review.gluster.org/22684 + > Change-Id: I609c7cc5c07e210c504771115b4d551a2e891adf + > BUG: 1707728 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: I609c7cc5c07e210c504771115b4d551a2e891adf +fixes: bz#1708116 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172395 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/syncdaemon/resource.py | 22 ++++-- + tests/00-geo-rep/georep-stderr-hang.t | 128 +++++++++++++++++++++++++++++++++ + tests/geo-rep.rc | 17 +++++ + 3 files changed, 163 insertions(+), 4 deletions(-) + create mode 100644 tests/00-geo-rep/georep-stderr-hang.t + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index 522279b..b16db60 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -1540,15 +1540,29 @@ class SSH(object): + + p0.stdin.close() + p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits. +- # wait for tar to terminate, collecting any errors, further +- # waiting for transfer to complete +- _, stderr1 = p1.communicate() + + # stdin and stdout of p0 is already closed, Reset to None and + # wait for child process to complete + p0.stdin = None + p0.stdout = None +- p0.communicate() ++ ++ def wait_for_tar(p0): ++ _, stderr = p0.communicate() ++ if log_err: ++ for errline in stderr.strip().split("\n")[:-1]: ++ if "No such file or directory" not in errline: ++ logging.error(lf("SYNC Error", ++ sync_engine="Tarssh", ++ error=errline)) ++ ++ t = syncdutils.Thread(target=wait_for_tar, args=(p0, )) ++ # wait for tar to terminate, collecting any errors, further ++ # waiting for transfer to complete ++ t.start() ++ ++ # wait for ssh process ++ _, stderr1 = p1.communicate() ++ t.join() + + if log_err: + for errline in stderr1.strip().split("\n")[:-1]: +diff --git a/tests/00-geo-rep/georep-stderr-hang.t b/tests/00-geo-rep/georep-stderr-hang.t +new file mode 100644 +index 0000000..496f0e6 +--- /dev/null ++++ b/tests/00-geo-rep/georep-stderr-hang.t +@@ -0,0 +1,128 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../geo-rep.rc ++. $(dirname $0)/../env.rc ++ ++SCRIPT_TIMEOUT=500 ++ ++AREQUAL_PATH=$(dirname $0)/../utils ++test "`uname -s`" != "Linux" && { ++ CFLAGS="$CFLAGS -lintl"; ++} ++build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS ++ ++### Basic Tests with Distribute Replicate volumes ++ ++##Cleanup and start glusterd ++cleanup; ++TEST glusterd; ++TEST pidof glusterd ++ ++ ++##Variables ++GEOREP_CLI="$CLI volume geo-replication" ++master=$GMV0 ++SH0="127.0.0.1" ++slave=${SH0}::${GSV0} ++num_active=2 ++num_passive=2 ++master_mnt=$M0 ++slave_mnt=$M1 ++ ++############################################################ ++#SETUP VOLUMES AND GEO-REPLICATION ++############################################################ ++ ++##create_and_start_master_volume ++TEST $CLI volume create $GMV0 $H0:$B0/${GMV0}1; ++TEST $CLI volume start $GMV0 ++ ++##create_and_start_slave_volume ++TEST $CLI volume create $GSV0 $H0:$B0/${GSV0}1; ++TEST $CLI volume start $GSV0 ++TEST $CLI volume set $GSV0 performance.stat-prefetch off ++TEST $CLI volume set $GSV0 performance.quick-read off ++TEST $CLI volume set $GSV0 performance.readdir-ahead off ++TEST $CLI volume set $GSV0 performance.read-ahead off ++ ++##Mount master ++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++############################################################ ++#BASIC GEO-REPLICATION TESTS ++############################################################ ++ ++TEST create_georep_session $master $slave ++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Created" ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Set changelog roll-over time to 45 secs ++TEST $CLI volume set $GMV0 changelog.rollover-time 45 ++ ++#Wait for common secret pem file to be created ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file ++ ++#Verify the keys are distributed ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed ++ ++#Set sync-jobs to 1 ++TEST $GEOREP_CLI $master $slave config sync-jobs 1 ++ ++#Start_georep ++TEST $GEOREP_CLI $master $slave start ++ ++touch $M0 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active" ++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Changelog Crawl" ++ ++#Check History Crawl. ++TEST $GEOREP_CLI $master $slave stop ++TEST create_data_hang "rsync_hang" ++TEST create_data "history_rsync" ++TEST $GEOREP_CLI $master $slave start ++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active" ++ ++#Verify arequal for whole volume ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Config tarssh as sync-engine ++TEST $GEOREP_CLI $master $slave config sync-method tarssh ++ ++#Create tarssh hang data ++TEST create_data_hang "tarssh_hang" ++TEST create_data "history_tar" ++ ++TEST $GEOREP_CLI $master $slave start ++EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active" ++ ++#Verify arequal for whole volume ++EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Delete Geo-rep ++TEST $GEOREP_CLI $master $slave delete ++ ++#Cleanup are-equal binary ++TEST rm $AREQUAL_PATH/arequal-checksum ++ ++#Cleanup authorized keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++ ++cleanup; ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc +index 2035b9f..e4f014e 100644 +--- a/tests/geo-rep.rc ++++ b/tests/geo-rep.rc +@@ -101,6 +101,23 @@ function create_data() + chown 1000:1000 ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ + } + ++function create_data_hang() ++{ ++ prefix=$1 ++ mkdir ${master_mnt}/${prefix} ++ cd ${master_mnt}/${prefix} ++ # ~1k files is required with 1 sync-job and hang happens if ++ # stderr buffer of tar/ssh executed with Popen is full (i.e., 64k). ++ # 64k is hit when ~800 files were not found while syncing data ++ # from master. So around 1k files is required to hit the condition. ++ for i in {1..1000} ++ do ++ echo "test data" > file$i ++ mv -f file$i file ++ done ++ cd - ++} ++ + function chown_file_ok() + { + local file_owner=$(stat --format "%u:%g" "$1") +-- +1.8.3.1 + diff --git a/SOURCES/0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch b/SOURCES/0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch new file mode 100644 index 0000000..78fa34b --- /dev/null +++ b/SOURCES/0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch @@ -0,0 +1,165 @@ +From c2b1c50f06cc59b47c9c834617dff2aed7177a78 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Mon, 18 Mar 2019 12:54:54 +0530 +Subject: [PATCH 164/169] cluster/ec: Fix handling of heal info cases without + locks + +When we use heal info command, it takes lot of time as in +some cases it takes lock on entries to find out if the +entry actually needs heal or not. + +There are some cases where we can avoid these locks and +can conclude if the entry needs heal or not. + +1 - We do a lookup (without lock) on an entry, which we found in +.glusterfs/indices/xattrop, and find that lock count is +zero. Now if the file contains dirty bit set on all or any +brick, we can say that this entry needs heal. + +2 - If the lock count is one and dirty is greater than 1, +then it also means that some fop had left the dirty bit set +which made the dirty count of current fop (which has taken lock) +more than one. At this point also we can definitely say that +this entry needs heal. + +This patch is modifying code to take into consideration above two +points. +It is also changing code to not to call ec_heal_inspect if ec_heal_do +was called from client side heal. Client side heal triggeres heal +only when it is sure that it requires heal. + +[We have changed the code to not to call heal for lookup] + +Upstream patch - +https://review.gluster.org/#/c/glusterfs/+/22372/ + +Fixes: bz#1716385 +Change-Id: I7f09f0ecd12f65a353297aefd57026fd2bebdf9c +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172579 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 42 ++++++++++++++++------------------------ + 1 file changed, 17 insertions(+), 25 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 3aa04fb..2fa1f11 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2541,13 +2541,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + + /* Mount triggers heal only when it detects that it must need heal, shd + * triggers heals periodically which need not be thorough*/ +- ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, +- !ec->shd.iamshd, &need_heal); ++ if (ec->shd.iamshd) { ++ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, ++ &need_heal); + +- if (need_heal == EC_HEAL_NONEED) { +- gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, +- "Heal is not required for : %s ", uuid_utoa(loc->gfid)); +- goto out; ++ if (need_heal == EC_HEAL_NONEED) { ++ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, ++ "Heal is not required for : %s ", uuid_utoa(loc->gfid)); ++ goto out; ++ } + } + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); +@@ -2902,7 +2904,7 @@ out: + static int32_t + _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, + gf_boolean_t self_locked, int32_t lock_count, +- ec_heal_need_t *need_heal) ++ ec_heal_need_t *need_heal, uint64_t *versions) + { + int i = 0; + int source_count = 0; +@@ -2912,7 +2914,7 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, + *need_heal = EC_HEAL_NONEED; + if (self_locked || lock_count == 0) { + for (i = 0; i < ec->nodes; i++) { +- if (dirty[i]) { ++ if (dirty[i] || (versions[i] != versions[0])) { + *need_heal = EC_HEAL_MUST; + goto out; + } +@@ -2928,6 +2930,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, + *need_heal = EC_HEAL_MUST; + goto out; + } ++ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) { ++ *need_heal = EC_HEAL_MAYBE; ++ } + } + } + } else { +@@ -2948,7 +2953,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, + unsigned char *healed_sinks = NULL; + uint64_t *meta_versions = NULL; + int ret = 0; +- int i = 0; + + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); +@@ -2961,15 +2965,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, + } + + ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, +- need_heal); +- if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) { +- for (i = 1; i < ec->nodes; i++) { +- if (meta_versions[i] != meta_versions[0]) { +- *need_heal = EC_HEAL_MUST; +- goto out; +- } +- } +- } ++ need_heal, meta_versions); + out: + return ret; + } +@@ -3005,7 +3001,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, + } + + ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, +- need_heal); ++ need_heal, data_versions); + out: + return ret; + } +@@ -3033,7 +3029,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies, + } + + ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count, +- need_heal); ++ need_heal, data_versions); + out: + return ret; + } +@@ -3131,10 +3127,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode, + need_heal: + ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough, + need_heal); +- +- if (!self_locked && *need_heal == EC_HEAL_MUST) { +- *need_heal = EC_HEAL_MAYBE; +- } + out: + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); +@@ -3220,7 +3212,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp) + + ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false, + _gf_false, &need_heal); +- if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) { ++ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) { + goto set_heal; + } + need_heal = EC_HEAL_NONEED; +-- +1.8.3.1 + diff --git a/SOURCES/0165-tests-shd-Add-test-coverage-for-shd-mux.patch b/SOURCES/0165-tests-shd-Add-test-coverage-for-shd-mux.patch new file mode 100644 index 0000000..5398a18 --- /dev/null +++ b/SOURCES/0165-tests-shd-Add-test-coverage-for-shd-mux.patch @@ -0,0 +1,442 @@ +From b7f832288d2d2e57231d90765afc049ad7cb2f9d Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 9 May 2019 14:07:48 +0530 +Subject: [PATCH 165/169] tests/shd: Add test coverage for shd mux + +This patch add more test cases for shd mux test cases +The test case includes +1) Createing multiple volumes to check the attach and detach + of self heal daemon requests. +2) Make sure the healing happens in all sceanarios +3) After a volume detach make sure the threads of the detached + volume is all cleaned. +4) Repeat all the above tests for ec volume +5) Node Reboot case +6) glusterd restart cases +7) Add-brick/remove brick +8) Convert a distributed volume to disperse volume +9) Convert a replicated volume to distributed volume + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22697/ + +>Change-Id: I7c317ef9d23a45ffd831157e4890d7c83a8fce7b +>fixes: bz#1708929 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: Ie732ead9413bd32b8c262303468a0720538334fb +BUG: 1704562 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172634 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/glusterd-restart-shd-mux.t | 96 +++++++++++++++++++++ + tests/basic/shd-mux.t | 149 +++++++++++++++++++++++++++++++++ + tests/basic/volume-scale-shd-mux.t | 112 +++++++++++++++++++++++++ + tests/volume.rc | 15 ++++ + 4 files changed, 372 insertions(+) + create mode 100644 tests/basic/glusterd-restart-shd-mux.t + create mode 100644 tests/basic/shd-mux.t + create mode 100644 tests/basic/volume-scale-shd-mux.t + +diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t +new file mode 100644 +index 0000000..a50af9d +--- /dev/null ++++ b/tests/basic/glusterd-restart-shd-mux.t +@@ -0,0 +1,96 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++TESTS_EXPECTED_IN_LOOP=20 ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} ++TEST $CLI volume set $V0 cluster.background-self-heal-count 0 ++TEST $CLI volume set $V0 cluster.eager-lock off ++TEST $CLI volume set $V0 performance.flush-behind off ++TEST $CLI volume start $V0 ++ ++for i in $(seq 1 3); do ++ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} ++ TEST $CLI volume start ${V0}_afr$i ++ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} ++ TEST $CLI volume start ${V0}_ec$i ++done ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++ ++#Stop the glusterd ++TEST pkill glusterd ++#Only stopping glusterd, so there will be one shd ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count ++TEST glusterd ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++shd_pid=$(get_shd_mux_pid $V0) ++for i in $(seq 1 3); do ++ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid" ++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path ++ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid" ++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path ++done ++ ++#Reboot a node scenario ++TEST pkill gluster ++#Only stopped glusterd, so there will be one shd ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count ++ ++TEST glusterd ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++ ++#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++shd_pid=$(get_shd_mux_pid $V0) ++for i in $(seq 1 3); do ++ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid" ++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path ++ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid" ++ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path ++done ++ ++for i in $(seq 1 3); do ++ TEST $CLI volume stop ${V0}_afr$i ++ TEST $CLI volume stop ${V0}_ec$i ++done ++ ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++ ++TEST touch $M0/foo{1..100} ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0 ++ ++TEST $CLI volume start ${V0} force ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++TEST rm -rf $M0/* ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++ ++TEST $CLI volume stop ${V0} ++TEST $CLI volume delete ${V0} ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count ++ ++cleanup +diff --git a/tests/basic/shd-mux.t b/tests/basic/shd-mux.t +new file mode 100644 +index 0000000..e42a34a +--- /dev/null ++++ b/tests/basic/shd-mux.t +@@ -0,0 +1,149 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++TESTS_EXPECTED_IN_LOOP=16 ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} ++TEST $CLI volume set $V0 cluster.background-self-heal-count 0 ++TEST $CLI volume set $V0 cluster.eager-lock off ++TEST $CLI volume set $V0 performance.flush-behind off ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++shd_pid=$(get_shd_mux_pid $V0) ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++#Create a one more volume ++TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5} ++TEST $CLI volume start ${V0}_1 ++ ++#Check whether the shd has multiplexed or not ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0} ++ ++TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0 ++TEST $CLI volume set ${V0}_1 cluster.eager-lock off ++TEST $CLI volume set ${V0}_1 performance.flush-behind off ++TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}4 ++TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10 ++TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14 ++ ++TEST touch $M0/foo{1..100} ++TEST touch $M1/foo{1..100} ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1 ++ ++TEST $CLI volume start ${V0} force ++TEST $CLI volume start ${V0}_1 force ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1 ++ ++TEST rm -rf $M0/* ++TEST rm -rf $M1/* ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 ++ ++#Stop the volume ++TEST $CLI volume stop ${V0}_1 ++TEST $CLI volume delete ${V0}_1 ++ ++#Check the stop succeeded and detached the volume with out restarting it ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 ++ ++#Check the thread count become to earlier number after stopping ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++ ++#Now create a ec volume and check mux works ++TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5} ++TEST $CLI volume start ${V0}_2 ++ ++#Check whether the shd has multiplexed or not ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0} ++ ++TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0 ++TEST $CLI volume set ${V0}_2 cluster.eager-lock off ++TEST $CLI volume set ${V0}_2 performance.flush-behind off ++TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}4 ++TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20 ++TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22 ++ ++TEST touch $M0/foo{1..100} ++TEST touch $M1/foo{1..100} ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2 ++ ++TEST $CLI volume start ${V0} force ++TEST $CLI volume start ${V0}_2 force ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2 ++ ++TEST rm -rf $M0/* ++TEST rm -rf $M1/* ++ ++ ++#Stop the volume ++TEST $CLI volume stop ${V0}_2 ++TEST $CLI volume delete ${V0}_2 ++ ++#Check the stop succeeded and detached the volume with out restarting it ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 ++ ++#Check the thread count become to zero for ec related threads ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++#Check the thread count become to earlier number after stopping ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++for i in $(seq 1 3); do ++ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} ++ TEST $CLI volume start ${V0}_afr$i ++ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} ++ TEST $CLI volume start ${V0}_ec$i ++done ++ ++#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++#Delete the volumes ++for i in $(seq 1 3); do ++ TEST $CLI volume stop ${V0}_afr$i ++ TEST $CLI volume stop ${V0}_ec$i ++ TEST $CLI volume delete ${V0}_afr$i ++ TEST $CLI volume delete ${V0}_ec$i ++done ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++ ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++TEST $CLI volume stop ${V0} ++TEST $CLI volume delete ${V0} ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count ++ ++cleanup +diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t +new file mode 100644 +index 0000000..dd9cf83 +--- /dev/null ++++ b/tests/basic/volume-scale-shd-mux.t +@@ -0,0 +1,112 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++TESTS_EXPECTED_IN_LOOP=6 ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} ++TEST $CLI volume set $V0 cluster.background-self-heal-count 0 ++TEST $CLI volume set $V0 cluster.eager-lock off ++TEST $CLI volume set $V0 performance.flush-behind off ++TEST $CLI volume start $V0 ++ ++for i in $(seq 1 2); do ++ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} ++ TEST $CLI volume start ${V0}_afr$i ++ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} ++ TEST $CLI volume start ${V0}_ec$i ++done ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++#Check the thread count become to number of volumes*number of ec subvolume (2*6=12) ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++#Check the thread count become to number of volumes*number of afr subvolume (3*6=18) ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8}; ++#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21) ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++#Remove the brick and check the detach is successful ++$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5}; ++#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18) ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++ ++#Remove the brick and check the detach is successful ++$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" ++ ++ ++for i in $(seq 1 2); do ++ TEST $CLI volume stop ${V0}_afr$i ++ TEST $CLI volume stop ${V0}_ec$i ++done ++ ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}4 ++ ++TEST touch $M0/foo{1..100} ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 ++ ++TEST $CLI volume start ${V0} force ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++TEST rm -rf $M0/* ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++shd_pid=$(get_shd_mux_pid $V0) ++TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10 ++TEST $CLI volume start ${V0}_distribute1 ++ ++#Creating a non-replicate/non-ec volume should not have any effect in shd ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++EXPECT "^${shd_pid}$" get_shd_mux_pid $V0 ++ ++TEST mkdir $B0/add/ ++#Now convert the distributed volume to replicate ++TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3} ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++#scale down the volume ++TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" ++ ++TEST $CLI volume stop ${V0} ++TEST $CLI volume delete ${V0} ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count ++ ++TEST rm -rf $B0/add/ ++TEST mkdir $B0/add/ ++#Now convert the distributed volume back to replicate and make sure that a new shd is spawned ++TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count ++EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "__afr_shd_healer_wait" ++ ++#Now convert the replica volume to distribute again and make sure the shd is now stopped ++TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force ++TEST rm -rf $B0/add/ ++ ++EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count ++ ++cleanup +diff --git a/tests/volume.rc b/tests/volume.rc +index a0ea3b8..bb400cc 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -912,3 +912,18 @@ function volgen_check_ancestry { + echo "N" + fi + } ++ ++function get_shd_mux_pid { ++ local volume=$1 ++ pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'` ++ echo $pid ++} ++ ++function shd_count { ++ ps aux | grep "glustershd" | grep -v grep | wc -l ++} ++ ++function number_healer_threads_shd { ++ local pid=$(get_shd_mux_pid $1) ++ pstack $pid | grep $2 | wc -l ++} +-- +1.8.3.1 + diff --git a/SOURCES/0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch b/SOURCES/0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch new file mode 100644 index 0000000..3aa441d --- /dev/null +++ b/SOURCES/0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch @@ -0,0 +1,92 @@ +From 79fff98f9ca5f815cf0227312b9a997d555dad29 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Wed, 22 May 2019 13:32:23 +0530 +Subject: [PATCH 166/169] glusterd/svc: glusterd_svcs_stop should call + individual wrapper function + +glusterd_svcs_stop should call individual wrapper function to stop a +daemon rather than calling glusterd_svc_stop. For example for shd, +it should call glusterd_shdsvc_stop instead of calling basic API +function to stop. Because the individual functions for each daemon +could be doing some specific operation in their wrapper function. + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22761/ + +>Change-Id: Ie6d40590251ad470ef3901d1141ab7b22c3498f5 +>fixes: bz#1712741 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I6df03e53f08c337d5d9b0e855a0b77894a2aacc9 +BUG: 1716865 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172288 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 12 ++++++++++-- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 10 +++++----- + 2 files changed, 15 insertions(+), 7 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 75f9a07..981cc87 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -656,10 +656,18 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) + int pid = -1; + + conf = THIS->private; ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + svc_proc = svc->svc_proc; +- GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out); +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); ++ if (!svc_proc) { ++ /* ++ * This can happen when stop was called on a volume that is not shd ++ * compatible. ++ */ ++ gf_msg_debug("glusterd", 0, "svc_proc is null, ie shd already stopped"); ++ ret = 0; ++ goto out; ++ } + + /* Get volinfo->shd from svc object */ + shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index f7be394..6a3ca52 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -86,25 +86,25 @@ glusterd_svcs_stop(glusterd_volinfo_t *volinfo) + priv = this->private; + GF_ASSERT(priv); + +- ret = glusterd_svc_stop(&(priv->nfs_svc), SIGKILL); ++ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL); + if (ret) + goto out; + +- ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM); ++ ret = priv->quotad_svc.stop(&(priv->quotad_svc), SIGTERM); + if (ret) + goto out; + + if (volinfo) { +- ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT); ++ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM); + if (ret) + goto out; + } + +- ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM); ++ ret = priv->bitd_svc.stop(&(priv->bitd_svc), SIGTERM); + if (ret) + goto out; + +- ret = glusterd_svc_stop(&(priv->scrub_svc), SIGTERM); ++ ret = priv->scrub_svc.stop(&(priv->scrub_svc), SIGTERM); + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch b/SOURCES/0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch new file mode 100644 index 0000000..44e971a --- /dev/null +++ b/SOURCES/0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch @@ -0,0 +1,64 @@ +From 321080e55f0ae97115a9542ba5de8494e7610860 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Tue, 14 May 2019 23:12:44 +0530 +Subject: [PATCH 167/169] glusterd/shd: Optimize the glustershd manager to send + reconfigure + +Traditionally all svc manager will execute process stop and then +followed by start each time when they called. But that is not +required by shd, because the attach request implemented in the shd +multiplex has the intelligence to check whether a detach is required +prior to attaching the graph. So there is no need to send an explicit +detach request if we are sure that the next call is an attach request + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22729/ +>Change-Id: I9157c8dcaffdac038f73286bcf5646a3f1d3d8ec +>fixes: bz#1710054 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I56aaaf3b4d28215307e160c1ba0e09bb74c30fbe +BUG: 1716865 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172289 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/graph.c | 1 - + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 9 +++++---- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 18fb2d9..27d9335 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1497,7 +1497,6 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); + parent_graph->xl_count -= graph->xl_count; + parent_graph->leaf_count -= graph->leaf_count; +- default_notify(xl, GF_EVENT_PARENT_DOWN, xl); + parent_graph->id++; + ret = 0; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 981cc87..d81d760 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -311,10 +311,11 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + */ + ret = svc->stop(svc, SIGTERM); + } else if (volinfo) { +- ret = svc->stop(svc, SIGTERM); +- if (ret) +- goto out; +- ++ if (volinfo->status != GLUSTERD_STATUS_STARTED) { ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) ++ goto out; ++ } + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = svc->start(svc, flags); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0168-cluster-dht-Fix-directory-perms-during-selfheal.patch b/SOURCES/0168-cluster-dht-Fix-directory-perms-during-selfheal.patch new file mode 100644 index 0000000..3cf8dff --- /dev/null +++ b/SOURCES/0168-cluster-dht-Fix-directory-perms-during-selfheal.patch @@ -0,0 +1,46 @@ +From 6198461bce7d264b71fe91e981aa3af3a19a8abe Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Tue, 4 Jun 2019 14:51:44 +0530 +Subject: [PATCH 168/169] cluster/dht: Fix directory perms during selfheal + +Fixed a bug in the revalidate code path that wiped out +directory permissions if no mds subvol was found. + +upstream: https://review.gluster.org/#/c/glusterfs/+/22813/ + +> Change-Id: I8b4239ffee7001493c59d4032a2d3062586ea115 +> fixes: bz#1716830 +> Signed-off-by: N Balachandran <nbalacha@redhat.com> + +BUG: 1716821 +Change-Id: I6d84d381d07a27d1ef9113a2104a62ceaf2110e3 +Signed-off-by: N Balachandran <nbalacha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172622 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 183872f..e1edb38 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1739,9 +1739,11 @@ unlock: + + if (dht_needs_selfheal(frame, this)) { + if (!__is_root_gfid(local->loc.inode->gfid)) { +- local->stbuf.ia_gid = local->mds_stbuf.ia_gid; +- local->stbuf.ia_uid = local->mds_stbuf.ia_uid; +- local->stbuf.ia_prot = local->mds_stbuf.ia_prot; ++ if (local->mds_subvol) { ++ local->stbuf.ia_gid = local->mds_stbuf.ia_gid; ++ local->stbuf.ia_uid = local->mds_stbuf.ia_uid; ++ local->stbuf.ia_prot = local->mds_stbuf.ia_prot; ++ } + } else { + local->stbuf.ia_gid = local->prebuf.ia_gid; + local->stbuf.ia_uid = local->prebuf.ia_uid; +-- +1.8.3.1 + diff --git a/SOURCES/0169-Build-Fix-spec-to-enable-rhel8-client-build.patch b/SOURCES/0169-Build-Fix-spec-to-enable-rhel8-client-build.patch new file mode 100644 index 0000000..294574d --- /dev/null +++ b/SOURCES/0169-Build-Fix-spec-to-enable-rhel8-client-build.patch @@ -0,0 +1,62 @@ +From 2e6241a800c98ba95b3420255d8089e0271b46eb Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Thu, 6 Jun 2019 16:18:26 +0530 +Subject: [PATCH 169/169] Build: Fix spec to enable rhel8 client build + +Updated the spec file with required changes to enable RHGS RHEL8 +client build. As Ganesha scripts are not python3 compatible, we +will not be generating RHGS RHEL8 server build until the required +changes are backported from upstream. + +Label : DOWNSTREAM ONLY + +BUG: 1717927 +Change-Id: I2a8d37d24405a8b2d5533ebf7b85327485f810d7 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172668 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 85e75f2..9c7d7a7 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -91,7 +91,7 @@ + + # disable server components forcefully as rhel <= 6 + %if ( 0%{?rhel} ) +-%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ))) ++%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) || ( "%{?dist}" == ".el8rhgs" ))) + %global _without_server --without-server + %endif + %endif +@@ -270,7 +270,7 @@ BuildRequires: python%{_pythonver}-devel + %if ( 0%{?rhel} && 0%{?rhel} < 8 ) + BuildRequires: python-ctypes + %endif +-%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} ) ++%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} ) || ( 0%{?rhel} && ( 0%{?rhel} >= 8 ) ) + BuildRequires: libtirpc-devel + %endif + %if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 ) +@@ -722,12 +722,10 @@ GlusterFS Events + + %prep + %setup -q -n %{name}-%{version}%{?prereltag} +-%if ( ! %{_usepython3} ) + echo "fixing python shebangs..." +-for f in api events extras geo-replication libglusterfs tools xlators; do +-find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \; ++for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do ++ sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i + done +-%endif + + %build + +-- +1.8.3.1 + diff --git a/SOURCES/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch b/SOURCES/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch new file mode 100644 index 0000000..a7ea429 --- /dev/null +++ b/SOURCES/0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch @@ -0,0 +1,119 @@ +From c4e292379928eaf1ebb47ee1c8e9b1eabbe90574 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Tue, 14 May 2019 11:05:45 +0530 +Subject: [PATCH 170/178] geo-rep: Convert gfid conflict resolutiong logs into + debug + +The gfid conflict resolution code path is not supposed +to hit in generic code path. But few of the heavy rename +workload (BUG: 1694820) makes it a generic case. So +logging the entries to be fixed as INFO floods the log +in these particular workloads. Hence convert them to DEBUG. + +Backport of: + > Patch: https://review.gluster.org/22720 + > fixes: bz#1709653 + > Change-Id: I4d5e102b87be5fe5b54f78f329e588882d72b9d9 + +BUG: 1714536 +Change-Id: I4d5e102b87be5fe5b54f78f329e588882d72b9d9 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172731 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/syncdaemon/master.py | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 42c86d7..3f98337 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -811,7 +811,7 @@ class GMasterChangelogMixin(GMasterCommon): + st = lstat(os.path.join(pfx, slave_gfid)) + # Takes care of scenarios with no hardlinks + if isinstance(st, int) and st == ENOENT: +- logging.info(lf('Entry not present on master. Fixing gfid ' ++ logging.debug(lf('Entry not present on master. Fixing gfid ' + 'mismatch in slave. Deleting the entry', + retry_count=retry_count, + entry=repr(failure))) +@@ -843,7 +843,7 @@ class GMasterChangelogMixin(GMasterCommon): + if matching_disk_gfid(slave_gfid, pbname): + # Safe to ignore the failure as master contains same + # file with same gfid. Remove entry from entries list +- logging.info(lf('Fixing gfid mismatch in slave. ' ++ logging.debug(lf('Fixing gfid mismatch in slave. ' + ' Safe to ignore, take out entry', + retry_count=retry_count, + entry=repr(failure))) +@@ -865,14 +865,14 @@ class GMasterChangelogMixin(GMasterCommon): + dst_entry = os.path.join(pfx, realpath.split('/')[-2], + realpath.split('/')[-1]) + src_entry = pbname +- logging.info(lf('Fixing dir name/gfid mismatch in ' ++ logging.debug(lf('Fixing dir name/gfid mismatch in ' + 'slave', retry_count=retry_count, + entry=repr(failure))) + if src_entry == dst_entry: + # Safe to ignore the failure as master contains + # same directory as in slave with same gfid. + # Remove the failure entry from entries list +- logging.info(lf('Fixing dir name/gfid mismatch' ++ logging.debug(lf('Fixing dir name/gfid mismatch' + ' in slave. Safe to ignore, ' + 'take out entry', + retry_count=retry_count, +@@ -886,7 +886,7 @@ class GMasterChangelogMixin(GMasterCommon): + entry=src_entry, + entry1=dst_entry, stat=st, + link=None) +- logging.info(lf('Fixing dir name/gfid mismatch' ++ logging.debug(lf('Fixing dir name/gfid mismatch' + ' in slave. Renaming', + retry_count=retry_count, + entry=repr(rename_dict))) +@@ -896,7 +896,7 @@ class GMasterChangelogMixin(GMasterCommon): + # renamed file exists and we are sure from + # matching_disk_gfid check that the entry doesn't + # exist with same gfid so we can safely delete on slave +- logging.info(lf('Fixing file gfid mismatch in slave. ' ++ logging.debug(lf('Fixing file gfid mismatch in slave. ' + 'Hardlink/Rename Case. Deleting entry', + retry_count=retry_count, + entry=repr(failure))) +@@ -915,7 +915,7 @@ class GMasterChangelogMixin(GMasterCommon): + # Safe to ignore the failure as master doesn't contain + # parent directory. + if isinstance(st, int): +- logging.info(lf('Fixing ENOENT error in slave. Parent ' ++ logging.debug(lf('Fixing ENOENT error in slave. Parent ' + 'does not exist on master. Safe to ' + 'ignore, take out entry', + retry_count=retry_count, +@@ -925,7 +925,7 @@ class GMasterChangelogMixin(GMasterCommon): + except ValueError: + pass + else: +- logging.info(lf('Fixing ENOENT error in slave. Create ' ++ logging.debug(lf('Fixing ENOENT error in slave. Create ' + 'parent directory on slave.', + retry_count=retry_count, + entry=repr(failure))) +@@ -1223,10 +1223,13 @@ class GMasterChangelogMixin(GMasterCommon): + + if gconf.get("gfid-conflict-resolution"): + count = 0 ++ if failures: ++ logging.info(lf('Entry ops failed with gfid mismatch', ++ count=len(failures))) + while failures and count < self.MAX_OE_RETRIES: + count += 1 + self.handle_entry_failures(failures, entries) +- logging.info("Retry original entries. count = %s" % count) ++ logging.info(lf('Retry original entries', count=count)) + failures = self.slave.server.entry_ops(entries) + if not failures: + logging.info("Successfully fixed all entry ops with " +-- +1.8.3.1 + diff --git a/SOURCES/0171-posix-add-storage.reserve-size-option.patch b/SOURCES/0171-posix-add-storage.reserve-size-option.patch new file mode 100644 index 0000000..2e76908 --- /dev/null +++ b/SOURCES/0171-posix-add-storage.reserve-size-option.patch @@ -0,0 +1,295 @@ +From 4d82c7879387e6f7963b4d9c84c4ff8a1788055d Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <sheetal.pamecha08@gmail.com> +Date: Mon, 19 Nov 2018 22:15:25 +0530 +Subject: [PATCH 171/178] posix: add storage.reserve-size option + +storage.reserve-size option will take size as input +instead of percentage. If set, priority will be given to +storage.reserve-size over storage.reserve. Default value +of this option is 0. + +> fixes: bz#1651445 +> Change-Id: I7a7342c68e436e8bf65bd39c567512ee04abbcea +> Signed-off-by: Sheetal Pamecha <sheetal.pamecha08@gmail.com> +> Cherry pick from commit 950726dfc8e3171bef625b563c0c6dbba1ec2928 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21686/ + +BUG: 1573077 +Change-Id: I7a7342c68e436e8bf65bd39c567512ee04abbcea +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172709 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + tests/bugs/posix/bug-1651445.t | 58 +++++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 33 ++++++++++++++ + xlators/storage/posix/src/posix-common.c | 34 ++++++++++++--- + xlators/storage/posix/src/posix-helpers.c | 13 ++++-- + xlators/storage/posix/src/posix-inode-fd-ops.c | 10 +++-- + xlators/storage/posix/src/posix.h | 3 +- + 6 files changed, 138 insertions(+), 13 deletions(-) + create mode 100644 tests/bugs/posix/bug-1651445.t + +diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t +new file mode 100644 +index 0000000..f6f1833 +--- /dev/null ++++ b/tests/bugs/posix/bug-1651445.t +@@ -0,0 +1,58 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup ++ ++TEST verify_lvm_version ++TEST glusterd ++TEST pidof glusterd ++TEST init_n_bricks 3 ++TEST setup_lvm 3 ++ ++TEST $CLI volume create $V0 replica 3 $H0:$L{1,2,3} ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST $CLI volume set $V0 storage.reserve-size 10MB ++ ++#No effect as priority to reserve-size ++TEST $CLI volume set $V0 storage.reserve 20 ++ ++TEST dd if=/dev/zero of=$M0/a bs=100M count=1 ++sleep 5 ++ ++#Below dd confirms posix is giving priority to reserve-size ++TEST dd if=/dev/zero of=$M0/b bs=40M count=1 ++ ++sleep 5 ++TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 ++ ++rm -rf $M0/* ++#Size will reserve from the previously set reserve option = 20% ++TEST $CLI volume set $V0 storage.reserve-size 0 ++ ++#Overwrite reserve option ++TEST $CLI volume set $V0 storage.reserve-size 40MB ++ ++#wait 5s to reset disk_space_full flag ++sleep 5 ++ ++TEST dd if=/dev/zero of=$M0/a bs=100M count=1 ++TEST dd if=/dev/zero of=$M0/b bs=10M count=1 ++ ++# Wait 5s to update disk_space_full flag because thread check disk space ++# after every 5s ++ ++sleep 5 ++# setup_lvm create lvm partition of 150M and 40M are reserve so after ++# consuming more than 110M next dd should fail ++TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 7a83124..3a7ab83 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1231,6 +1231,30 @@ out: + + return ret; + } ++static int ++validate_size(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, ++ char **op_errstr) ++{ ++ xlator_t *this = NULL; ++ uint64_t size = 0; ++ int ret = -1; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ret = gf_string2bytesize_uint64(value, &size); ++ if (ret < 0) { ++ gf_asprintf(op_errstr, ++ "%s is not a valid size. %s " ++ "expects a valid value in bytes", ++ value, key); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", ++ *op_errstr); ++ } ++out: ++ gf_msg_debug("glusterd", 0, "Returning %d", ret); ++ ++ return ret; ++} + + /* dispatch table for VOLUME SET + * ----------------------------- +@@ -2830,6 +2854,15 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_3_13_0, + }, + { ++ .key = "storage.reserve-size", ++ .voltype = "storage/posix", ++ .value = "0", ++ .validate_fn = validate_size, ++ .description = "If set, priority will be given to " ++ "storage.reserve-size over storage.reserve", ++ .op_version = GD_OP_VERSION_7_0, ++ }, ++ { + .option = "health-check-timeout", + .key = "storage.health-check-timeout", + .type = NO_DOC, +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index ed82e35..0f70af5 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -345,11 +345,18 @@ posix_reconfigure(xlator_t *this, dict_t *options) + " fallback to <hostname>:<export>"); + } + +- GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out); +- if (priv->disk_reserve) { ++ GF_OPTION_RECONF("reserve-size", priv->disk_reserve_size, options, size, ++ out); ++ ++ GF_OPTION_RECONF("reserve", priv->disk_reserve_percent, options, uint32, ++ out); ++ if (priv->disk_reserve_size || priv->disk_reserve_percent) { + ret = posix_spawn_disk_space_check_thread(this); +- if (ret) ++ if (ret) { ++ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, ++ "Getting disk space check from thread failed"); + goto out; ++ } + } + + GF_OPTION_RECONF("health-check-interval", priv->health_check_interval, +@@ -968,11 +975,17 @@ posix_init(xlator_t *this) + + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; +- GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out); +- if (_private->disk_reserve) { ++ GF_OPTION_INIT("reserve-size", _private->disk_reserve_size, size, out); ++ ++ GF_OPTION_INIT("reserve", _private->disk_reserve_percent, uint32, out); ++ ++ if (_private->disk_reserve_size || _private->disk_reserve_percent) { + ret = posix_spawn_disk_space_check_thread(this); +- if (ret) ++ if (ret) { ++ gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, ++ "Getting disk space check from thread failed "); + goto out; ++ } + } + + _private->health_check_active = _gf_false; +@@ -1216,6 +1229,15 @@ struct volume_options posix_options[] = { + " Set to 0 to disable", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, ++ {.key = {"reserve-size"}, ++ .type = GF_OPTION_TYPE_SIZET, ++ .min = 0, ++ .default_value = "0", ++ .validate = GF_OPT_VALIDATE_MIN, ++ .description = "size in megabytes to be reserved for disk space." ++ " Set to 0 to disable", ++ .op_version = {GD_OP_VERSION_7_0}, ++ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index d0fd45a..aecf4f8 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2246,6 +2246,7 @@ posix_disk_space_check(xlator_t *this) + struct posix_private *priv = NULL; + char *subvol_path = NULL; + int op_ret = 0; ++ uint64_t size = 0; + int percent = 0; + struct statvfs buf = {0}; + uint64_t totsz = 0; +@@ -2256,7 +2257,14 @@ posix_disk_space_check(xlator_t *this) + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + subvol_path = priv->base_path; +- percent = priv->disk_reserve; ++ ++ if (priv->disk_reserve_size) { ++ size = priv->disk_reserve_size; ++ } else { ++ percent = priv->disk_reserve_percent; ++ totsz = (buf.f_blocks * buf.f_bsize); ++ size = ((totsz * percent) / 100); ++ } + + op_ret = sys_statvfs(subvol_path, &buf); + +@@ -2265,10 +2273,9 @@ posix_disk_space_check(xlator_t *this) + "statvfs failed on %s", subvol_path); + goto out; + } +- totsz = (buf.f_blocks * buf.f_bsize); + freesz = (buf.f_bfree * buf.f_bsize); + +- if (freesz <= ((totsz * percent) / 100)) { ++ if (freesz <= size) { + priv->disk_space_full = 1; + } else { + priv->disk_space_full = 0; +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 2c19ce1..7ca4d26 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -720,7 +720,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + thread after every 5 sec sleep to working correctly storage.reserve + option behaviour + */ +- if (priv->disk_reserve) ++ if (priv->disk_reserve_size || priv->disk_reserve_percent) + posix_disk_space_check(this); + + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); +@@ -2331,8 +2331,12 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + goto out; + } + +- percent = priv->disk_reserve; +- reserved_blocks = (buf.f_blocks * percent) / 100; ++ if (priv->disk_reserve_size) { ++ reserved_blocks = priv->disk_reserve_size / buf.f_bsize; ++ } else { ++ percent = priv->disk_reserve_percent; ++ reserved_blocks = (buf.f_blocks * percent) / 100; ++ } + + if (buf.f_bfree > reserved_blocks) { + buf.f_bfree = (buf.f_bfree - reserved_blocks); +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 1da4d01..4364b96 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -225,7 +225,8 @@ struct posix_private { + pthread_t health_check; + gf_boolean_t health_check_active; + +- uint32_t disk_reserve; ++ uint32_t disk_reserve_percent; ++ uint64_t disk_reserve_size; + uint32_t disk_space_full; + pthread_t disk_space_check; + gf_boolean_t disk_space_check_active; +-- +1.8.3.1 + diff --git a/SOURCES/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch b/SOURCES/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch new file mode 100644 index 0000000..0dc15ba --- /dev/null +++ b/SOURCES/0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch @@ -0,0 +1,141 @@ +From 998d9b8b5e271f407e1c654c34f45f0db36abc71 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Tue, 21 May 2019 17:15:07 +0530 +Subject: [PATCH 172/178] ec/fini: Fix race with ec_fini and ec_notify + +During a graph cleanup, we first sent a PARENT_DOWN and wait for +a child down to ultimately free the xlator and the graph. + +In the ec xlator, we cleanup the threads when we get a PARENT_DOWN event. +But a racing event like CHILD_UP or event xl_op may trigger healing threads +after threads cleanup. + +So there is a chance that the threads might access a freed private variabe + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22758/ + +>Change-Id: I252d10181bb67b95900c903d479de707a8489532 +>fixes: bz#1703948 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I84a10352d9fb3e68d4147b3791e3af45ab79050e +BUG: 1703434 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172285 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 3 +++ + libglusterfs/src/libglusterfs.sym | 1 + + libglusterfs/src/xlator.c | 21 +++++++++++++++++++++ + xlators/cluster/ec/src/ec-heal.c | 4 ++++ + xlators/cluster/ec/src/ec-heald.c | 6 ++++++ + xlators/cluster/ec/src/ec.c | 3 +++ + 6 files changed, 38 insertions(+) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 8998976..09e463e 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1092,4 +1092,7 @@ gluster_graph_take_reference(xlator_t *tree); + + gf_boolean_t + mgmt_is_multiplexed_daemon(char *name); ++ ++gf_boolean_t ++xlator_is_cleanup_starting(xlator_t *this); + #endif /* _XLATOR_H */ +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index ec474e7..7a2edef 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1161,3 +1161,4 @@ glusterfs_process_svc_attach_volfp + glusterfs_mux_volfile_reconfigure + glusterfs_process_svc_detach + mgmt_is_multiplexed_daemon ++xlator_is_cleanup_starting +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 022c3ed..fbfbbe2 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1486,3 +1486,24 @@ mgmt_is_multiplexed_daemon(char *name) + } + return _gf_false; + } ++ ++gf_boolean_t ++xlator_is_cleanup_starting(xlator_t *this) ++{ ++ gf_boolean_t cleanup = _gf_false; ++ glusterfs_graph_t *graph = NULL; ++ xlator_t *xl = NULL; ++ ++ if (!this) ++ goto out; ++ graph = this->graph; ++ ++ if (!graph) ++ goto out; ++ ++ xl = graph->first; ++ if (xl && xl->cleanup_starting) ++ cleanup = _gf_true; ++out: ++ return cleanup; ++} +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 2fa1f11..8844c29 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2855,6 +2855,10 @@ ec_replace_brick_heal_wrap(void *opaque) + itable = ec->xl->itable; + else + goto out; ++ ++ if (xlator_is_cleanup_starting(ec->xl)) ++ goto out; ++ + ret = ec_replace_heal(ec, itable->root); + out: + return ret; +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index edf5e11..91512d7 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -444,6 +444,9 @@ unlock: + int + ec_shd_full_healer_spawn(xlator_t *this, int subvol) + { ++ if (xlator_is_cleanup_starting(this)) ++ return -1; ++ + return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol), + ec_shd_full_healer); + } +@@ -451,6 +454,9 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol) + int + ec_shd_index_healer_spawn(xlator_t *this, int subvol) + { ++ if (xlator_is_cleanup_starting(this)) ++ return -1; ++ + return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol), + ec_shd_index_healer); + } +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 264582a..df5912c 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -486,6 +486,9 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state) + { + uintptr_t current_state = 0; + ++ if (xlator_is_cleanup_starting(ec->xl)) ++ return _gf_false; ++ + if ((ec->xl_notify & index_mask) == 0) { + ec->xl_notify |= index_mask; + ec->xl_notify_count++; +-- +1.8.3.1 + diff --git a/SOURCES/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch b/SOURCES/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch new file mode 100644 index 0000000..bdaaac5 --- /dev/null +++ b/SOURCES/0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch @@ -0,0 +1,52 @@ +From c001b60047c73e07f42ee858dd8ae19136ecd61b Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 6 Jun 2019 13:19:29 +0530 +Subject: [PATCH 173/178] glusterd: store fips-mode-rchecksum option in the + info file + +commit 146e4b45d0ce906ae50fd6941a1efafd133897ea enabled +storage.fips-mode-rchecksum option for all new volumes with op-version +>=GD_OP_VERSION_7_0 but `gluster vol get $volname +storage.fips-mode-rchecksum` was displaying it as 'off'. This patch fixes it. + +>upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22830/ + +>fixes: bz#1717782 +>Change-Id: Ie09f89838893c5776a3f60569dfe8d409d1494dd +>Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +BUG: 1715407 +Change-Id: Ie09f89838893c5776a3f60569dfe8d409d1494dd +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172799 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2bc4836..7768b8e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13124,6 +13124,17 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + } + } + } ++ if (conf->op_version >= GD_OP_VERSION_7_0) { ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, ++ "storage.fips-mode-rchecksum", "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set option 'storage.fips-mode-rchecksum' " ++ "on volume %s", ++ volinfo->volname); ++ goto out; ++ } ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch b/SOURCES/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch new file mode 100644 index 0000000..66c5ff4 --- /dev/null +++ b/SOURCES/0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch @@ -0,0 +1,53 @@ +From 9b94397a5a735910fab2a29670146a1feb6d890e Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Tue, 4 Jun 2019 11:13:50 +0530 +Subject: [PATCH 174/178] xlator/log: Add more logging in + xlator_is_cleanup_starting + +This patch will add two extra logs for invalid argument + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22810/ + +>Change-Id: I3950b4f4b9d88b1f1e788ef93d8f09d4bd8d4d8b +>updates: bz#1703948 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I3950b4f4b9d88b1f1e788ef93d8f09d4bd8d4d8b +BUG: 1703434 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172800 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/xlator.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index fbfbbe2..71e1ed4 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1494,12 +1494,18 @@ xlator_is_cleanup_starting(xlator_t *this) + glusterfs_graph_t *graph = NULL; + xlator_t *xl = NULL; + +- if (!this) ++ if (!this) { ++ gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, ++ "xlator object is null, returning false"); + goto out; +- graph = this->graph; ++ } + +- if (!graph) ++ graph = this->graph; ++ if (!graph) { ++ gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, ++ "Graph is not set for xlator %s", this->name); + goto out; ++ } + + xl = graph->first; + if (xl && xl->cleanup_starting) +-- +1.8.3.1 + diff --git a/SOURCES/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch b/SOURCES/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch new file mode 100644 index 0000000..c13d96d --- /dev/null +++ b/SOURCES/0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch @@ -0,0 +1,241 @@ +From 9fd966aa6879ac9867381629f82eca24b950d731 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Sun, 2 Jun 2019 01:36:33 +0530 +Subject: [PATCH 175/178] ec/fini: Fix race between xlator cleanup and on going + async fop + +Problem: +While we process a cleanup, there is a chance for a race between +async operations, for example ec_launch_replace_heal. So this can +lead to invalid mem access. + +Solution: +Just like we track on going heal fops, we can also track fops like +ec_launch_replace_heal, so that we can decide when to send a +PARENT_DOWN request. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22798/ + +>Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298 +>fixes: bz#1703948 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298 +BUG: 1714588 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172801 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 10 ++++++++++ + xlators/cluster/ec/src/ec-common.h | 2 ++ + xlators/cluster/ec/src/ec-data.c | 4 +++- + xlators/cluster/ec/src/ec-heal.c | 17 +++++++++++++++-- + xlators/cluster/ec/src/ec-types.h | 1 + + xlators/cluster/ec/src/ec.c | 37 +++++++++++++++++++++++++------------ + 6 files changed, 56 insertions(+), 15 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index e85aa8b..9cc6395 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2955,3 +2955,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error) + + __ec_manager(fop, error); + } ++ ++gf_boolean_t ++__ec_is_last_fop(ec_t *ec) ++{ ++ if ((list_empty(&ec->pending_fops)) && ++ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) { ++ return _gf_true; ++ } ++ return _gf_false; ++} +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index e948342..bf6c97d 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -204,4 +204,6 @@ void + ec_reset_entry_healing(ec_fop_data_t *fop); + char * + ec_msg_str(ec_fop_data_t *fop); ++gf_boolean_t ++__ec_is_last_fop(ec_t *ec); + #endif /* __EC_COMMON_H__ */ +diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c +index 6ef9340..8d2d9a1 100644 +--- a/xlators/cluster/ec/src/ec-data.c ++++ b/xlators/cluster/ec/src/ec-data.c +@@ -202,11 +202,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify) + { + ec_t *ec = fop->xl->private; + ++ *notify = _gf_false; ++ + if (!list_empty(&fop->pending_list)) { + LOCK(&ec->lock); + { + list_del_init(&fop->pending_list); +- *notify = list_empty(&ec->pending_fops); ++ *notify = __ec_is_last_fop(ec); + } + UNLOCK(&ec->lock); + } +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 8844c29..237fea2 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2814,8 +2814,20 @@ int + ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque) + { + ec_t *ec = opaque; ++ gf_boolean_t last_fop = _gf_false; + ++ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) { ++ LOCK(&ec->lock); ++ { ++ last_fop = __ec_is_last_fop(ec); ++ } ++ UNLOCK(&ec->lock); ++ } + gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret); ++ ++ if (last_fop) ++ ec_pending_fops_completed(ec); ++ + return 0; + } + +@@ -2869,14 +2881,15 @@ ec_launch_replace_heal(ec_t *ec) + { + int ret = -1; + +- if (!ec) +- return ret; + ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap, + ec_replace_heal_done, NULL, ec); ++ + if (ret < 0) { + gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d", + ret); ++ ec_replace_heal_done(-1, NULL, ec); + } ++ + return ret; + } + +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 1c295c0..4dbf4a3 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -643,6 +643,7 @@ struct _ec { + uintptr_t xl_notify; /* Bit flag representing + notification for bricks. */ + uintptr_t node_mask; ++ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */ + xlator_t **xl_list; + gf_lock_t lock; + gf_timer_t *timer; +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index df5912c..f0d58c0 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -355,6 +355,7 @@ ec_notify_cbk(void *data) + ec_t *ec = data; + glusterfs_event_t event = GF_EVENT_MAXVAL; + gf_boolean_t propagate = _gf_false; ++ gf_boolean_t launch_heal = _gf_false; + + LOCK(&ec->lock); + { +@@ -384,6 +385,11 @@ ec_notify_cbk(void *data) + * still bricks DOWN, they will be healed when they + * come up. */ + ec_up(ec->xl, ec); ++ ++ if (ec->shd.iamshd && !ec->shutdown) { ++ launch_heal = _gf_true; ++ GF_ATOMIC_INC(ec->async_fop_count); ++ } + } + + propagate = _gf_true; +@@ -391,13 +397,12 @@ ec_notify_cbk(void *data) + unlock: + UNLOCK(&ec->lock); + ++ if (launch_heal) { ++ /* We have just brought the volume UP, so we trigger ++ * a self-heal check on the root directory. */ ++ ec_launch_replace_heal(ec); ++ } + if (propagate) { +- if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) { +- /* We have just brought the volume UP, so we trigger +- * a self-heal check on the root directory. */ +- ec_launch_replace_heal(ec); +- } +- + default_notify(ec->xl, event, NULL); + } + } +@@ -425,7 +430,7 @@ ec_disable_delays(ec_t *ec) + { + ec->shutdown = _gf_true; + +- return list_empty(&ec->pending_fops); ++ return __ec_is_last_fop(ec); + } + + void +@@ -603,7 +608,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + if (event == GF_EVENT_CHILD_UP) { + /* We need to trigger a selfheal if a brick changes + * to UP state. */ +- needs_shd_check = ec_set_up_state(ec, mask, mask); ++ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd && ++ !ec->shutdown) { ++ needs_shd_check = _gf_true; ++ } + } else if (event == GF_EVENT_CHILD_DOWN) { + ec_set_up_state(ec, mask, 0); + } +@@ -633,17 +641,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + } + } else { + propagate = _gf_false; ++ needs_shd_check = _gf_false; ++ } ++ ++ if (needs_shd_check) { ++ GF_ATOMIC_INC(ec->async_fop_count); + } + } + unlock: + UNLOCK(&ec->lock); + + done: ++ if (needs_shd_check) { ++ ec_launch_replace_heal(ec); ++ } + if (propagate) { +- if (needs_shd_check && ec->shd.iamshd) { +- ec_launch_replace_heal(ec); +- } +- + error = default_notify(this, event, data); + } + +@@ -705,6 +717,7 @@ init(xlator_t *this) + ec->xl = this; + LOCK_INIT(&ec->lock); + ++ GF_ATOMIC_INIT(ec->async_fop_count, 0); + INIT_LIST_HEAD(&ec->pending_fops); + INIT_LIST_HEAD(&ec->heal_waiting); + INIT_LIST_HEAD(&ec->healing); +-- +1.8.3.1 + diff --git a/SOURCES/0176-features-shard-Fix-crash-during-background-shard-del.patch b/SOURCES/0176-features-shard-Fix-crash-during-background-shard-del.patch new file mode 100644 index 0000000..487d8e2 --- /dev/null +++ b/SOURCES/0176-features-shard-Fix-crash-during-background-shard-del.patch @@ -0,0 +1,289 @@ +From 40ac42501d6bbff7206e753e8e988beefe74f5f4 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Fri, 5 Apr 2019 10:30:23 +0530 +Subject: [PATCH 176/178] features/shard: Fix crash during background shard + deletion in a specific case + +Consider the following case - +1. A file gets FALLOCATE'd such that > "shard-lru-limit" number of + shards are created. +2. And then it is deleted after that. + +The unique thing about FALLOCATE is that unlike WRITE, all of the +participant shards are resolved and created and fallocated in a single +batch. This means, in this case, after the first "shard-lru-limit" +number of shards are resolved and added to lru list, as part of +resolution of the remaining shards, some of the existing shards in lru +list will need to be evicted. So these evicted shards will be +inode_unlink()d as part of eviction. Now once the fop gets to the actual +FALLOCATE stage, the lru'd-out shards get added to fsync list. + +2 things to note at this point: +i. the lru'd out shards are only part of fsync list, so each holds 1 ref + on base shard +ii. and the more recently used shards are part of both fsync and lru list. + So each of these shards holds 2 refs on base inode - one for being + part of fsync list, and the other for being part of lru list. + +FALLOCATE completes successfully and then this very file is deleted, and +background shard deletion launched. Here's where the ref counts get mismatched. +First as part of inode_resolve()s during the deletion, the lru'd-out inodes +return NULL, because they are inode_unlink()'d by now. So these inodes need to +be freshly looked up. But as part of linking them in lookup_cbk (precisely in +shard_link_block_inode()), inode_link() returns the lru'd-out inode object. +And its inode ctx is still valid and ctx->base_inode valid from the last +time it was added to list. + +But shard_common_lookup_shards_cbk() passes NULL in the place of base_pointer +to __shard_update_shards_inode_list(). This means, as part of adding the lru'd out +inode back to lru list, base inode is not ref'd since its NULL. + +Whereas post unlinking this shard, during shard_unlink_block_inode(), +ctx->base_inode is accessible and is unref'd because the shard was found to be part +of LRU list, although the matching ref didn't occur. This at some point leads to +base_inode refcount becoming 0 and it getting destroyed and released back while some +of its associated shards are continuing to be unlinked in parallel and the client crashes +whenever it is accessed next. + +Fix is to pass base shard correctly, if available, in shard_link_block_inode(). + +Also, the patch fixes the ret value check in tests/bugs/shard/shard-fallocate.c + +>Change-Id: Ibd0bc4c6952367608e10701473cbad3947d7559f +>Updates: bz#1696136 +>Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22507/ + +BUG: 1694595 +Change-Id: Ibd0bc4c6952367608e10701473cbad3947d7559f +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172856 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/bugs/shard/bug-1696136.c | 121 +++++++++++++++++++++++++++++++++++++ + tests/bugs/shard/bug-1696136.t | 33 ++++++++++ + tests/bugs/shard/shard-fallocate.c | 2 +- + xlators/features/shard/src/shard.c | 12 +++- + 4 files changed, 164 insertions(+), 4 deletions(-) + create mode 100644 tests/bugs/shard/bug-1696136.c + create mode 100644 tests/bugs/shard/bug-1696136.t + +diff --git a/tests/bugs/shard/bug-1696136.c b/tests/bugs/shard/bug-1696136.c +new file mode 100644 +index 0000000..b9e8d13 +--- /dev/null ++++ b/tests/bugs/shard/bug-1696136.c +@@ -0,0 +1,121 @@ ++#define _GNU_SOURCE ++#include <fcntl.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <glusterfs/api/glfs.h> ++#include <glusterfs/api/glfs-handles.h> ++ ++enum fallocate_flag { ++ TEST_FALLOCATE_NONE, ++ TEST_FALLOCATE_KEEP_SIZE, ++ TEST_FALLOCATE_ZERO_RANGE, ++ TEST_FALLOCATE_PUNCH_HOLE, ++ TEST_FALLOCATE_MAX, ++}; ++ ++int ++get_fallocate_flag(int opcode) ++{ ++ int ret = 0; ++ ++ switch (opcode) { ++ case TEST_FALLOCATE_NONE: ++ ret = 0; ++ break; ++ case TEST_FALLOCATE_KEEP_SIZE: ++ ret = FALLOC_FL_KEEP_SIZE; ++ break; ++ case TEST_FALLOCATE_ZERO_RANGE: ++ ret = FALLOC_FL_ZERO_RANGE; ++ break; ++ case TEST_FALLOCATE_PUNCH_HOLE: ++ ret = FALLOC_FL_PUNCH_HOLE; ++ break; ++ default: ++ ret = -1; ++ break; ++ } ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret = 1; ++ int opcode = -1; ++ off_t offset = 0; ++ size_t len = 0; ++ glfs_t *fs = NULL; ++ glfs_fd_t *fd = NULL; ++ ++ if (argc != 8) { ++ fprintf(stderr, ++ "Syntax: %s <host> <volname> <opcode> <offset> <len> " ++ "<file-path> <log-file>\n", ++ argv[0]); ++ return 1; ++ } ++ ++ fs = glfs_new(argv[2]); ++ if (!fs) { ++ fprintf(stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret); ++ goto out; ++ } ++ ++ ret = glfs_set_logging(fs, argv[7], 7); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_logging: returned %d\n", ret); ++ goto out; ++ } ++ ++ ret = glfs_init(fs); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_init: returned %d\n", ret); ++ goto out; ++ } ++ ++ opcode = atoi(argv[3]); ++ opcode = get_fallocate_flag(opcode); ++ if (opcode < 0) { ++ fprintf(stderr, "get_fallocate_flag: invalid flag \n"); ++ goto out; ++ } ++ ++ offset = atoi(argv[4]); ++ len = atoi(argv[5]); ++ ++ fd = glfs_open(fs, argv[6], O_RDWR); ++ if (fd == NULL) { ++ fprintf(stderr, "glfs_open: returned NULL\n"); ++ goto out; ++ } ++ ++ ret = glfs_fallocate(fd, opcode, offset, len); ++ if (ret < 0) { ++ fprintf(stderr, "glfs_fallocate: returned %d\n", ret); ++ goto out; ++ } ++ ++ ret = glfs_unlink(fs, argv[6]); ++ if (ret < 0) { ++ fprintf(stderr, "glfs_unlink: returned %d\n", ret); ++ goto out; ++ } ++ /* Sleep for 3s to give enough time for background deletion to complete ++ * during which if the bug exists, the process will crash. ++ */ ++ sleep(3); ++ ret = 0; ++ ++out: ++ if (fd) ++ glfs_close(fd); ++ glfs_fini(fs); ++ return ret; ++} +diff --git a/tests/bugs/shard/bug-1696136.t b/tests/bugs/shard/bug-1696136.t +new file mode 100644 +index 0000000..b6dc858 +--- /dev/null ++++ b/tests/bugs/shard/bug-1696136.t +@@ -0,0 +1,33 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fallocate.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 120 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2 ++ ++# Create a file ++TEST touch $M0/file1 ++ ++# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit ++TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++rm -f $(dirname $0)/bug-1696136 ++ ++cleanup +diff --git a/tests/bugs/shard/shard-fallocate.c b/tests/bugs/shard/shard-fallocate.c +index 3a784d3..45b9ce0 100644 +--- a/tests/bugs/shard/shard-fallocate.c ++++ b/tests/bugs/shard/shard-fallocate.c +@@ -97,7 +97,7 @@ main(int argc, char *argv[]) + } + + ret = glfs_fallocate(fd, opcode, offset, len); +- if (ret <= 0) { ++ if (ret < 0) { + fprintf(stderr, "glfs_fallocate: returned %d\n", ret); + goto out; + } +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index fa3564a..3c4bcdc 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -2213,13 +2213,19 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, + xlator_t *this = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; ++ inode_t *base_inode = NULL; + + this = THIS; + priv = this->private; +- if (local->loc.inode) ++ if (local->loc.inode) { + gf_uuid_copy(gfid, local->loc.inode->gfid); +- else ++ base_inode = local->loc.inode; ++ } else if (local->resolver_base_inode) { ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ base_inode = local->resolver_base_inode; ++ } else { + gf_uuid_copy(gfid, local->base_gfid); ++ } + + shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); + +@@ -2232,7 +2238,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, + LOCK(&priv->lock); + { + fsync_inode = __shard_update_shards_inode_list( +- linked_inode, this, local->loc.inode, block_num, gfid); ++ linked_inode, this, base_inode, block_num, gfid); + } + UNLOCK(&priv->lock); + if (fsync_inode) +-- +1.8.3.1 + diff --git a/SOURCES/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch b/SOURCES/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch new file mode 100644 index 0000000..0156324 --- /dev/null +++ b/SOURCES/0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch @@ -0,0 +1,161 @@ +From 4f0aa008ed393d7ce222c4ea4bd0fa6ed52b48f6 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Fri, 5 Apr 2019 12:29:23 +0530 +Subject: [PATCH 177/178] features/shard: Fix extra unref when inode object is + lru'd out and added back + +Long tale of double unref! But do read... + +In cases where a shard base inode is evicted from lru list while still +being part of fsync list but added back soon before its unlink, there +could be an extra inode_unref() leading to premature inode destruction +leading to crash. + +One such specific case is the following - + +Consider features.shard-deletion-rate = features.shard-lru-limit = 2. +This is an oversimplified example but explains the problem clearly. + +First, a file is FALLOCATE'd to a size so that number of shards under +/.shard = 3 > lru-limit. +Shards 1, 2 and 3 need to be resolved. 1 and 2 are resolved first. +Resultant lru list: + 1 -----> 2 +refs on base inode - (1) + (1) = 2 +3 needs to be resolved. So 1 is lru'd out. Resultant lru list - + 2 -----> 3 +refs on base inode - (1) + (1) = 2 + +Note that 1 is inode_unlink()d but not destroyed because there are +non-zero refs on it since it is still participating in this ongoing +FALLOCATE operation. + +FALLOCATE is sent on all participant shards. In the cbk, all of them are +added to fync_list. +Resulting fsync list - + 1 -----> 2 -----> 3 (order doesn't matter) +refs on base inode - (1) + (1) + (1) = 3 +Total refs = 3 + 2 = 5 + +Now an attempt is made to unlink this file. Background deletion is triggered. +The first $shard-deletion-rate shards need to be unlinked in the first batch. +So shards 1 and 2 need to be resolved. inode_resolve fails on 1 but succeeds +on 2 and so it's moved to tail of list. +lru list now - + 3 -----> 2 +No change in refs. + +shard 1 is looked up. In lookup_cbk, it's linked and added back to lru list +at the cost of evicting shard 3. +lru list now - + 2 -----> 1 +refs on base inode: (1) + (1) = 2 +fsync list now - + 1 -----> 2 (again order doesn't matter) +refs on base inode - (1) + (1) = 2 +Total refs = 2 + 2 = 4 +After eviction, it is found 3 needs fsync. So fsync is wound, yet to be ack'd. +So it is still inode_link()d. + +Now deletion of shards 1 and 2 completes. lru list is empty. Base inode unref'd and +destroyed. +In the next batched deletion, 3 needs to be deleted. It is inode_resolve()able. +It is added back to lru list but base inode passed to __shard_update_shards_inode_list() +is NULL since the inode is destroyed. But its ctx->inode still contains base inode ptr +from first addition to lru list for no additional ref on it. +lru list now - + 3 +refs on base inode - (0) +Total refs on base inode = 0 +Unlink is sent on 3. It completes. Now since the ctx contains ptr to base_inode and the +shard is part of lru list, base shard is unref'd leading to a crash. + +FIX: +When shard is readded back to lru list, copy the base inode pointer as is into its inode ctx, +even if it is NULL. This is needed to prevent double unrefs at the time of deleting it. + +Upstream patch: +> BUG: 1696136 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22517 +> Change-Id: I99a44039da2e10a1aad183e84f644d63ca552462 +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +Change-Id: I99a44039da2e10a1aad183e84f644d63ca552462 +Updates: bz#1694595 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172803 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + .../bug-1696136-lru-limit-equals-deletion-rate.t | 34 ++++++++++++++++++++++ + xlators/features/shard/src/shard.c | 6 ++-- + 2 files changed, 36 insertions(+), 4 deletions(-) + create mode 100644 tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t + +diff --git a/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t +new file mode 100644 +index 0000000..3e4a65a +--- /dev/null ++++ b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t +@@ -0,0 +1,34 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fallocate.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 features.shard-lru-limit 120 ++TEST $CLI volume set $V0 features.shard-deletion-rate 120 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2 ++ ++# Create a file ++TEST touch $M0/file1 ++ ++# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit ++TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++rm -f $(dirname $0)/bug-1696136 ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 3c4bcdc..c1799ad 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -689,8 +689,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, + ctx->block_num = block_num; + list_add_tail(&ctx->ilist, &priv->ilist_head); + priv->inode_count++; +- if (base_inode) +- ctx->base_inode = inode_ref(base_inode); ++ ctx->base_inode = inode_ref(base_inode); + } else { + /*If on the other hand there is no available slot for this inode + * in the list, delete the lru inode from the head of the list, +@@ -765,8 +764,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; +- if (base_inode) +- ctx->base_inode = inode_ref(base_inode); ++ ctx->base_inode = inode_ref(base_inode); + list_add_tail(&ctx->ilist, &priv->ilist_head); + } + } else { +-- +1.8.3.1 + diff --git a/SOURCES/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch b/SOURCES/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch new file mode 100644 index 0000000..1ff767d --- /dev/null +++ b/SOURCES/0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch @@ -0,0 +1,287 @@ +From 307074330db6e9f14941dfbabbe6f299cf841533 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Mon, 10 Jun 2019 23:58:16 +0530 +Subject: [PATCH 178/178] Cluster/afr: Don't treat all bricks having metadata + pending as split-brain + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22831/ + +Problem: +We currently don't have a roll-back/undoing of post-ops if quorum is not met. +Though the FOP is still unwound with failure, the xattrs remain on the disk. +Due to these partial post-ops and partial heals (healing only when 2 bricks +are up), we can end up in metadata split-brain purely from the afr xattrs +point of view i.e each brick is blamed by atleast one of the others for +metadata. These scenarios are hit when there is frequent connect/disconnect +of the client/shd to the bricks. + +Fix: +Pick a source based on the xattr values. If 2 bricks blame one, the blamed +one must be treated as sink. If there is no majority, all are sources. Once +we pick a source, self-heal will then do the heal instead of erroring out +due to split-brain. +This patch also adds restriction of all the bricks to be up to perform +metadata heal to avoid any metadata loss. + +Removed the test case tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t +as it was doing metadata heal even when only 2 of 3 bricks were up. + +Change-Id: I02064ecb7d68d498f75a353af64f75249a633508 +fixes: bz#1715438 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172935 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1468279-source-not-blaming-sinks.t | 64 ---------- + .../bug-1717819-metadata-split-brain-detection.t | 130 +++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-common.c | 4 +- + xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 +- + 4 files changed, 133 insertions(+), 67 deletions(-) + delete mode 100644 tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t + create mode 100644 tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t + +diff --git a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t b/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t +deleted file mode 100644 +index 054a4ad..0000000 +--- a/tests/bugs/replicate/bug-1468279-source-not-blaming-sinks.t ++++ /dev/null +@@ -1,64 +0,0 @@ +-#!/bin/bash +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +-TEST $CLI volume set $V0 cluster.metadata-self-heal off +-TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; +-TEST touch $M0/file +- +-# Kill B1, create a pending metadata heal. +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST setfattr -n user.xattr -v value1 $M0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file +- +-# Kill B2, heal from B3 to B1. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +-TEST kill_brick $V0 $H0 $B0/${V0}1 +-TEST $CLI volume set $V0 cluster.self-heal-daemon on +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +-$CLI volume heal $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-0 "metadata" +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +- +-# Create another pending metadata heal. +-TEST setfattr -n user.xattr -v value2 $M0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file +- +-# Kill B1, heal from B3 to B2 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST $CLI volume set $V0 cluster.self-heal-daemon on +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +-$CLI volume heal $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 "metadata" +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +- +-# ALL bricks up again. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +-# B1 and B2 blame each other, B3 doesn't blame anyone. +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/file +-EXPECT "0000000000000010000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/file +-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file +-EXPECT "0000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file +-TEST $CLI volume set $V0 cluster.self-heal-daemon on +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +-TEST $CLI volume heal $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +- +-cleanup; +diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t +new file mode 100644 +index 0000000..94b8bf3 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t +@@ -0,0 +1,130 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $CLI volume heal $V0 disable ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++############################################################################### ++# Case of 2 bricks blaming the third and the third blaming the other two. ++ ++TEST mkdir $M0/dir ++ ++# B0 and B2 must blame B1 ++TEST kill_brick $V0 $H0 $B0/$V0"1" ++TEST setfattr -n user.metadata -v 1 $M0/dir ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata ++CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir) ++ ++# B1 must blame B0 and B2 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir ++ ++# Launch heal ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir) ++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir) ++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir) ++ ++TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ] ++TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ] ++TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ] ++TEST setfattr -x user.metadata $M0/dir ++ ++############################################################################### ++# Case of each brick blaming the next one in a cyclic manner ++ ++TEST $CLI volume heal $V0 disable ++TEST `echo "hello" >> $M0/dir/file` ++# Mark cyclic xattrs and modify metadata directly on the bricks. ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file ++ ++setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file ++setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file ++setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file)) ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file) ++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file) ++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file) ++ ++TEST [ "$B0_XATTR" == "$B1_XATTR" ] ++TEST [ "$B0_XATTR" == "$B2_XATTR" ] ++TEST rm -f $M0/dir/file ++ ++############################################################################### ++# Case of 2 bricks having quorum blaming and the other having only one blaming. ++ ++TEST $CLI volume heal $V0 disable ++TEST `echo "hello" >> $M0/dir/file` ++# B0 and B2 must blame B1 ++TEST kill_brick $V0 $H0 $B0/$V0"1" ++TEST setfattr -n user.metadata -v 1 $M0/dir/file ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata ++ ++# B1 must blame B0 and B2 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file ++ ++# B0 must blame B2 ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file ++ ++# Modify the metadata directly on the bricks B1 & B2. ++setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file ++setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file ++ ++# Launch heal ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file) ++B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file) ++B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file) ++ ++TEST [ "$B0_XATTR" == "$B1_XATTR" ] ++TEST [ "$B0_XATTR" == "$B2_XATTR" ] ++ ++############################################################################### ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 595bed4..5157e7d 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1590,7 +1590,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION) { ++ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { + min_participants = priv->child_count; + } else { + min_participants = AFR_SH_MIN_PARTICIPANTS; +@@ -1656,7 +1656,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION) ++ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) + afr_selfheal_post_op_failure_accounting(priv, accused, sources, + locked_on); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index ba43341..ecfa791 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -398,7 +398,7 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode) + ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, + data_lock); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + ret = -ENOTCONN; + goto unlock; + } +-- +1.8.3.1 + diff --git a/SOURCES/0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch b/SOURCES/0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch new file mode 100644 index 0000000..c9df885 --- /dev/null +++ b/SOURCES/0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch @@ -0,0 +1,72 @@ +From fe1d641e4666f9a20f656b1799cf6e7b75af1279 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Tue, 11 Jun 2019 11:31:02 +0530 +Subject: [PATCH 179/192] tests: Fix split-brain-favorite-child-policy.t + failure + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22850/ + +Problem: +The test case is failing to heal the volume within $HEAL_TIMEOUT @195. +This is happening because as part of split-brain resolution the file +gets expunged from the sink and the new entry mark for that file will +be done on the source bricks as part of impunging. Since the source +bricks shd-threads failed to get the heal-domain lock, they will wait +for the heal-timeout of 10 minutes, which is greater than $HEAL_TIMEOUT. + +Fix: +Set the cluster.heal-timeout to 5 seconds to trigger the heal so that +one of the source brick heals the file within the $HEAL_TIMEOUT. + +Change-Id: Iae5e819aa564ccde6639c51711f49d1152260c2d +updates: bz#1704562 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172965 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/afr/split-brain-favorite-child-policy.t | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/basic/afr/split-brain-favorite-child-policy.t +index 0e321c6..c268c12 100644 +--- a/tests/basic/afr/split-brain-favorite-child-policy.t ++++ b/tests/basic/afr/split-brain-favorite-child-policy.t +@@ -16,6 +16,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off + TEST $CLI volume set $V0 cluster.entry-self-heal off + TEST $CLI volume set $V0 cluster.data-self-heal off + TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.heal-timeout 5 + TEST $CLI volume start $V0 + TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + TEST touch $M0/file +@@ -38,7 +39,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + TEST $CLI volume heal $V0 + +-#file fill in split-brain ++#file still in split-brain + cat $M0/file > /dev/null + EXPECT "1" echo $? + +@@ -124,7 +125,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + TEST $CLI volume heal $V0 + +-#file fill in split-brain ++#file still in split-brain + cat $M0/file > /dev/null + EXPECT "1" echo $? + +@@ -179,7 +180,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + TEST $CLI volume heal $V0 + +-#file fill in split-brain ++#file still in split-brain + cat $M0/file > /dev/null + EXPECT "1" echo $? + +-- +1.8.3.1 + diff --git a/SOURCES/0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch b/SOURCES/0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch new file mode 100644 index 0000000..7bdc9f2 --- /dev/null +++ b/SOURCES/0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch @@ -0,0 +1,44 @@ +From 30b6d3452df0ef6621592a786f0c4347e09aa8f2 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Tue, 11 Jun 2019 12:00:25 +0530 +Subject: [PATCH 180/192] ganesha/scripts : Make generate-epoch.py python3 + compatible + +This would help in building RHEL8 glusterfs server build. We don't need +to validate this fix as such given RHEL8 glusterfs server support at +RHGS 3.5.0 is an internal milestone. + +Label : DOWNSTREAM ONLY + +Change-Id: I738219613680406de5c86a452446035c72a52bc4 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172974 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + extras/ganesha/scripts/generate-epoch.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py +index 5db5e56..61ccda9 100755 +--- a/extras/ganesha/scripts/generate-epoch.py ++++ b/extras/ganesha/scripts/generate-epoch.py +@@ -36,13 +36,13 @@ def epoch_uuid(): + + uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-","")) + +- epoch_uuid = int(uuid_bin.encode('hex'), 32) & 0xFFFF0000 ++ epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000 + return epoch_uuid + + # Construct epoch as follows - + # first 32-bit contains the now() time + # rest 32-bit value contains the local glusterd node uuid + epoch = (epoch_now() | epoch_uuid()) +-print str(epoch) ++print(str(epoch)) + + exit(0) +-- +1.8.3.1 + diff --git a/SOURCES/0181-afr-log-before-attempting-data-self-heal.patch b/SOURCES/0181-afr-log-before-attempting-data-self-heal.patch new file mode 100644 index 0000000..955441d --- /dev/null +++ b/SOURCES/0181-afr-log-before-attempting-data-self-heal.patch @@ -0,0 +1,59 @@ +From 99f86ae7d45667d86b1b6f9f9540ec2889c6c4ce Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Wed, 8 May 2019 04:51:27 -0400 +Subject: [PATCH 181/192] afr: log before attempting data self-heal. + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22685/ + +I was working on a blog about troubleshooting AFR issues and I wanted to copy +the messages logged by self-heal for my blog. I then realized that AFR-v2 is not +logging *before* attempting data heal while it logs it for metadata and entry +heals. + +I [MSGID: 108026] [afr-self-heal-entry.c:883:afr_selfheal_entry_do] +0-testvol-replicate-0: performing entry selfheal on +d120c0cf-6e87-454b-965b-0d83a4c752bb +I [MSGID: 108026] [afr-self-heal-common.c:1741:afr_log_selfheal] +0-testvol-replicate-0: Completed entry selfheal on +d120c0cf-6e87-454b-965b-0d83a4c752bb. sources=[0] 2 sinks=1 +I [MSGID: 108026] [afr-self-heal-common.c:1741:afr_log_selfheal] +0-testvol-replicate-0: Completed data selfheal on +a9b5f183-21eb-4fb3-a342-287d3a7dddc5. sources=[0] 2 sinks=1 +I [MSGID: 108026] [afr-self-heal-metadata.c:52:__afr_selfheal_metadata_do] +0-testvol-replicate-0: performing metadata selfheal on +a9b5f183-21eb-4fb3-a342-287d3a7dddc5 +I [MSGID: 108026] [afr-self-heal-common.c:1741:afr_log_selfheal] +0-testvol-replicate-0: Completed metadata selfheal on +a9b5f183-21eb-4fb3-a342-287d3a7dddc5. sources=[0] 2 sinks=1 + +Adding it in this patch. Now there is a 'performing' and a corresponding +'Completed' message for every type of heal. + +BUG: 1710701 +Change-Id: I91e29dd05af1c78dbc447d1a02dda32b03d64aef +fixes: bz#1710701 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173108 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/afr/src/afr-self-heal-data.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index 18a0334..cdff4a5 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -324,6 +324,9 @@ afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source, + call_frame_t *iter_frame = NULL; + unsigned char arbiter_sink_status = 0; + ++ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO, ++ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid)); ++ + priv = this->private; + if (priv->arbiter_count) { + arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX]; +-- +1.8.3.1 + diff --git a/SOURCES/0182-geo-rep-fix-mountbroker-setup.patch b/SOURCES/0182-geo-rep-fix-mountbroker-setup.patch new file mode 100644 index 0000000..64dd617 --- /dev/null +++ b/SOURCES/0182-geo-rep-fix-mountbroker-setup.patch @@ -0,0 +1,55 @@ +From 37df54966d5b7f01ad24d329bac5da1cf17f2abe Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Wed, 12 Jun 2019 16:10:52 +0530 +Subject: [PATCH 182/192] geo-rep : fix mountbroker setup + +Problem: + +Unable to setup mountbroker root directory while creating geo-replication +session for non-root user. + +Casue: +With patch[1] which defines the max-port for glusterd one extra sapce +got added in field of 'option max-port'. +[1]. https://review.gluster.org/#/c/glusterfs/+/21872/ + +In geo-rep spliting of key-value pair form vol file was done on the +basis of space so this additional space caused "ValueError: too many values +to unpack". + +Solution: +Use split so that it can treat consecutive whitespace as a single separator. + +Backport of: + + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22716/ + >Fixes: bz#1709248 + >Change-Id: Ia22070a43f95d66d84cb35487f23f9ee58b68c73 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1708043 +Change-Id: Ic6d535a6faad62ce185c6aa5adc18f5fdf8f27be +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173149 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/src/peer_mountbroker.py.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in +index 54f95c4..ce33f97 100644 +--- a/geo-replication/src/peer_mountbroker.py.in ++++ b/geo-replication/src/peer_mountbroker.py.in +@@ -47,7 +47,7 @@ class MountbrokerUserMgmt(object): + for line in f: + line = line.strip() + if line.startswith("option "): +- key, value = line.split(" ")[1:] ++ key, value = line.split()[1:] + self._options[key] = value + if line.startswith("#"): + self.commented_lines.append(line) +-- +1.8.3.1 + diff --git a/SOURCES/0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch b/SOURCES/0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch new file mode 100644 index 0000000..0cd6092 --- /dev/null +++ b/SOURCES/0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch @@ -0,0 +1,47 @@ +From fe9159ee42f0f67b01e6a495df8105ea0f66738d Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 30 May 2019 23:48:05 +0530 +Subject: [PATCH 183/192] glusterd/svc: Stop stale process using the + glusterd_proc_stop + +While restarting a glusterd process, when we have a stale pid +we were doing a simple kill. Instead we can use glusterd_proc_stop +Because it has more logging plus force kill in case if there is +any problem with kill signal handling. + +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22791/ + +>Change-Id: I4a2dadc210a7a65762dd714e809899510622b7ec +>updates: bz#1710054 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I3327528d8ebf90bbb2221265a0cf059c9359f141 +BUG: 1720248 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172290 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index 6a3ca52..a6e662f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -488,9 +488,9 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + + if (!mux_proc) { + if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) { +- /* stale pid file, unlink it. */ +- kill(pid, SIGTERM); +- sys_unlink(svc->proc.pidfile); ++ /* stale pid file, stop and unlink it */ ++ glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE); ++ glusterd_unlink_file(svc->proc.pidfile); + } + mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); + } +-- +1.8.3.1 + diff --git a/SOURCES/0184-tests-Add-gating-configuration-file-for-rhel8.patch b/SOURCES/0184-tests-Add-gating-configuration-file-for-rhel8.patch new file mode 100644 index 0000000..9c385b5 --- /dev/null +++ b/SOURCES/0184-tests-Add-gating-configuration-file-for-rhel8.patch @@ -0,0 +1,38 @@ +From c6fdb740675999883a8a7942fbcd32f9889dc739 Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Thu, 13 Jun 2019 21:58:43 +0530 +Subject: [PATCH 184/192] tests: Add gating configuration file for rhel8 + +Adding configuration files to enable automatic execution +of gating CI for glusterfs. + +Label: DOWNSTREAM ONLY + +BUG: 1720318 +Change-Id: I8b42792d93d1eea455f86acd1576c20e12eed9f0 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173412 +Reviewed-by: Vivek Das <vdas@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + gating.yaml | 7 +++++++ + 1 file changed, 7 insertions(+) + create mode 100644 gating.yaml + +diff --git a/gating.yaml b/gating.yaml +new file mode 100644 +index 0000000..eeab6e9 +--- /dev/null ++++ b/gating.yaml +@@ -0,0 +1,7 @@ ++--- !Policy ++product_versions: ++ - rhel-8 ++decision_context: osci_compose_gate_modules ++subject_type: redhat-module ++rules: ++ - !PassingTestCaseRule {test_case_name: manual.sst_rh_gluster_storage.glusterfs.bvt} +-- +1.8.3.1 + diff --git a/SOURCES/0185-gfapi-provide-an-api-for-setting-statedump-path.patch b/SOURCES/0185-gfapi-provide-an-api-for-setting-statedump-path.patch new file mode 100644 index 0000000..8e93247 --- /dev/null +++ b/SOURCES/0185-gfapi-provide-an-api-for-setting-statedump-path.patch @@ -0,0 +1,174 @@ +From 462e3988936761317975fd811dd355b81328b60a Mon Sep 17 00:00:00 2001 +From: Amar Tumballi <amarts@redhat.com> +Date: Thu, 14 Mar 2019 10:04:28 +0530 +Subject: [PATCH 185/192] gfapi: provide an api for setting statedump path + +Currently for an application using glfsapi to use glusterfs, when a +statedump is taken, it uses /var/run/gluster dir to dump info. + +There can be concerns as this directory may be owned by some other +user, and hence it may fail taking statedump. Such applications +should have an option to use different path. + +This patch provides an API to do so. + +Upstream details: +> Updates: bz#1689097 +> Change-Id: I8918e002bc823d83614c972b6c738baa04681b23 +> URL: https://review.gluster.org/22364 + +BUG: 1720461 +Change-Id: I6079c8d799f35eaf76e62d259b51573bf561ba5b +Signed-off-by: Amar Tumballi <amarts@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173451 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + api/src/gfapi.aliases | 2 ++ + api/src/gfapi.map | 5 ++++ + api/src/glfs.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++ + api/src/glfs.h | 28 +++++++++++++++++++++++ + 4 files changed, 98 insertions(+) + +diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases +index 09c0fd8..8fdf734 100644 +--- a/api/src/gfapi.aliases ++++ b/api/src/gfapi.aliases +@@ -195,3 +195,5 @@ _pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_6.0 + _pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_6.0 + _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0 + _pub_glfs_setattr _glfs_setattr$GFAPI_6.0 ++ ++_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_future +diff --git a/api/src/gfapi.map b/api/src/gfapi.map +index b97a614..cf118e8 100644 +--- a/api/src/gfapi.map ++++ b/api/src/gfapi.map +@@ -271,3 +271,8 @@ GFAPI_PRIVATE_6.1 { + global: + glfs_setfspid; + } GFAPI_6.0; ++ ++GFAPI_future { ++ global: ++ glfs_set_statedump_path; ++} GFAPI_PRIVATE_6.1; +diff --git a/api/src/glfs.c b/api/src/glfs.c +index f4a8e08..ba513e6 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -1212,6 +1212,7 @@ glusterfs_ctx_destroy(glusterfs_ctx_t *ctx) + glusterfs_graph_destroy_residual(trav_graph); + } + ++ GF_FREE(ctx->statedump_path); + FREE(ctx); + + return ret; +@@ -1738,3 +1739,65 @@ invalid_fs: + } + + GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0); ++ ++int ++pub_glfs_set_statedump_path(struct glfs *fs, const char *path) ++{ ++ struct stat st; ++ int ret; ++ DECLARE_OLD_THIS; ++ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs); ++ ++ if (!path) { ++ gf_log("glfs", GF_LOG_ERROR, "path is NULL"); ++ errno = EINVAL; ++ goto err; ++ } ++ ++ /* If path is not present OR, if it is directory AND has enough permission ++ * to create files, then proceed */ ++ ret = sys_stat(path, &st); ++ if (ret && errno != ENOENT) { ++ gf_log("glfs", GF_LOG_ERROR, "%s: not a valid path (%s)", path, ++ strerror(errno)); ++ errno = EINVAL; ++ goto err; ++ } ++ ++ if (!ret) { ++ /* file is present, now check other things */ ++ if (!S_ISDIR(st.st_mode)) { ++ gf_log("glfs", GF_LOG_ERROR, "%s: path is not directory", path); ++ errno = EINVAL; ++ goto err; ++ } ++ if (sys_access(path, W_OK | X_OK) < 0) { ++ gf_log("glfs", GF_LOG_ERROR, ++ "%s: path doesn't have write permission", path); ++ errno = EPERM; ++ goto err; ++ } ++ } ++ ++ /* If set, it needs to be freed, so we don't have leak */ ++ GF_FREE(fs->ctx->statedump_path); ++ ++ fs->ctx->statedump_path = gf_strdup(path); ++ if (!fs->ctx->statedump_path) { ++ gf_log("glfs", GF_LOG_ERROR, ++ "%s: failed to set statedump path, no memory", path); ++ errno = ENOMEM; ++ goto err; ++ } ++ ++ __GLFS_EXIT_FS; ++ ++ return 0; ++err: ++ __GLFS_EXIT_FS; ++ ++invalid_fs: ++ return -1; ++} ++ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, future); +diff --git a/api/src/glfs.h b/api/src/glfs.h +index 6714782..a6c12e1 100644 +--- a/api/src/glfs.h ++++ b/api/src/glfs.h +@@ -1453,5 +1453,33 @@ int + glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat, + int follow) __THROW GFAPI_PUBLIC(glfs_setattr, 6.0); + ++/* ++ SYNOPSIS ++ ++ glfs_set_statedump_path: Function to set statedump path. ++ ++ DESCRIPTION ++ ++ This function is used to set statedump directory ++ ++ PARAMETERS ++ ++ @fs: The 'virtual mount' object to be configured with the volume ++ specification file. ++ ++ @path: statedump path. Should be a directory. But the API won't fail if the ++ directory doesn't exist yet, as one may create it later. ++ ++ RETURN VALUES ++ ++ 0 : Success. ++ -1 : Failure. @errno will be set with the type of failure. ++ ++ */ ++ ++int ++glfs_set_statedump_path(struct glfs *fs, const char *path) __THROW ++ GFAPI_PUBLIC(glfs_set_statedump_path, future); ++ + __END_DECLS + #endif /* !_GLFS_H */ +-- +1.8.3.1 + diff --git a/SOURCES/0186-cli-Remove-brick-warning-seems-unnecessary.patch b/SOURCES/0186-cli-Remove-brick-warning-seems-unnecessary.patch new file mode 100644 index 0000000..e81b015 --- /dev/null +++ b/SOURCES/0186-cli-Remove-brick-warning-seems-unnecessary.patch @@ -0,0 +1,57 @@ +From be925e84edcecd879e953bdb68c10f98825dba53 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Mon, 3 Jun 2019 18:05:24 +0530 +Subject: [PATCH 186/192] cli: Remove-brick warning seems unnecessary + +As force-migration option is disabled by default, +the warning seems unnessary. + +Rephrased the warning to make best sense out of it. + +>fixes: bz#1712668 +>Change-Id: Ia18c3c5e7b3fec808fce2194ca0504a837708822 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/22805/ + +Bug: 1708183 +Change-Id: Ia18c3c5e7b3fec808fce2194ca0504a837708822 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173447 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-volume.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 564aef7..a42e663 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2090,14 +2090,15 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state, + " on the volume.\nDo you want to continue?"; + } else if (command == GF_OP_CMD_START) { + question = +- "Running remove-brick with cluster.force-migration" +- " enabled can result in data corruption. It is safer" +- " to disable this option so that files that receive " +- "writes during migration are not migrated.\nFiles " +- "that are not migrated can then be manually copied " +- "after the remove-brick commit operation.\nDo you " +- "want to continue with your current " +- "cluster.force-migration settings?"; ++ "It is recommended that remove-brick be run with" ++ " cluster.force-migration option disabled to prevent" ++ " possible data corruption. Doing so will ensure that" ++ " files that receive writes during migration will not" ++ " be migrated and will need to be manually copied" ++ " after the remove-brick commit operation. Please" ++ " check the value of the option and update accordingly." ++ " \nDo you want to continue with your current" ++ " cluster.force-migration settings?"; + } + + if (!brick_count) { +-- +1.8.3.1 + diff --git a/SOURCES/0187-gfapi-statedump_path-add-proper-version-number.patch b/SOURCES/0187-gfapi-statedump_path-add-proper-version-number.patch new file mode 100644 index 0000000..f1df9c9 --- /dev/null +++ b/SOURCES/0187-gfapi-statedump_path-add-proper-version-number.patch @@ -0,0 +1,98 @@ +From a65982755b31fb548ff7a997ee754360a516da94 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi <amarts@redhat.com> +Date: Fri, 14 Jun 2019 13:58:25 +0530 +Subject: [PATCH 187/192] gfapi: statedump_path() add proper version number + +An API should have the proper version number, and 'future' version +number is just a place holder. One shouldn't be using it in the +release versions. + +With the previous backport of the patch, the version remained same +as that of 'master' branch, which is future, but as it is an API, +it needed a fixed version number. With this patch, corrected the same. + +Label: DOWNSTREAM_ONLY + +> In upstream, this is corrected by a backport to the stable version, 6.4 +> URL: https://review.gluster.org/22864 + +BUG: 1720461 +Change-Id: I939850689d47d4f240c9d43f6be1a11de29c4760 +Signed-off-by: Amar Tumballi <amarts@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173475 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + api/examples/glfsxmp.c | 5 +++++ + api/src/gfapi.aliases | 2 +- + api/src/gfapi.map | 2 +- + api/src/glfs.c | 2 +- + api/src/glfs.h | 2 +- + 5 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c +index 9d96eea..33f44df 100644 +--- a/api/examples/glfsxmp.c ++++ b/api/examples/glfsxmp.c +@@ -1573,6 +1573,11 @@ main(int argc, char *argv[]) + + ret = glfs_set_logging(fs2, "/dev/stderr", 7); + ++ ret = glfs_set_statedump_path(fs2, "/tmp"); ++ if (ret) { ++ fprintf(stderr, "glfs_set_statedump_path: %s\n", strerror(errno)); ++ } ++ + ret = glfs_init(fs2); + + fprintf(stderr, "glfs_init: returned %d\n", ret); +diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases +index 8fdf734..692ae13 100644 +--- a/api/src/gfapi.aliases ++++ b/api/src/gfapi.aliases +@@ -196,4 +196,4 @@ _pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_6.0 + _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0 + _pub_glfs_setattr _glfs_setattr$GFAPI_6.0 + +-_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_future ++_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_6.4 +diff --git a/api/src/gfapi.map b/api/src/gfapi.map +index cf118e8..df65837 100644 +--- a/api/src/gfapi.map ++++ b/api/src/gfapi.map +@@ -272,7 +272,7 @@ GFAPI_PRIVATE_6.1 { + glfs_setfspid; + } GFAPI_6.0; + +-GFAPI_future { ++GFAPI_6.4 { + global: + glfs_set_statedump_path; + } GFAPI_PRIVATE_6.1; +diff --git a/api/src/glfs.c b/api/src/glfs.c +index ba513e6..6bbb620 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -1800,4 +1800,4 @@ invalid_fs: + return -1; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, future); ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 6.4); +diff --git a/api/src/glfs.h b/api/src/glfs.h +index a6c12e1..08b6ca0 100644 +--- a/api/src/glfs.h ++++ b/api/src/glfs.h +@@ -1479,7 +1479,7 @@ glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat, + + int + glfs_set_statedump_path(struct glfs *fs, const char *path) __THROW +- GFAPI_PUBLIC(glfs_set_statedump_path, future); ++ GFAPI_PUBLIC(glfs_set_statedump_path, 6.4); + + __END_DECLS + #endif /* !_GLFS_H */ +-- +1.8.3.1 + diff --git a/SOURCES/0188-features-shard-Fix-integer-overflow-in-block-count-a.patch b/SOURCES/0188-features-shard-Fix-integer-overflow-in-block-count-a.patch new file mode 100644 index 0000000..2360ceb --- /dev/null +++ b/SOURCES/0188-features-shard-Fix-integer-overflow-in-block-count-a.patch @@ -0,0 +1,38 @@ +From 7221352670a750e35268573dba36c139a5041b14 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Fri, 3 May 2019 10:50:40 +0530 +Subject: [PATCH 188/192] features/shard: Fix integer overflow in block count + accounting + +... by holding delta_blocks in 64-bit int as opposed to 32-bit int. + +> Upstream: https://review.gluster.org/22655 +> BUG: 1705884 +> Change-Id: I2c1ddab17457f45e27428575ad16fa678fd6c0eb + +Change-Id: I2c1ddab17457f45e27428575ad16fa678fd6c0eb +updates: bz#1668001 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173476 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/features/shard/src/shard.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 570fe46..cd6a663 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -275,7 +275,7 @@ typedef struct shard_local { + size_t req_size; + size_t readdir_size; + int64_t delta_size; +- int delta_blocks; ++ int64_t delta_blocks; + loc_t loc; + loc_t dot_shard_loc; + loc_t dot_shard_rm_loc; +-- +1.8.3.1 + diff --git a/SOURCES/0189-features-shard-Fix-block-count-accounting-upon-trunc.patch b/SOURCES/0189-features-shard-Fix-block-count-accounting-upon-trunc.patch new file mode 100644 index 0000000..bc07fad --- /dev/null +++ b/SOURCES/0189-features-shard-Fix-block-count-accounting-upon-trunc.patch @@ -0,0 +1,323 @@ +From 369c5772a722b6e346ec8b41f992112785366778 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Wed, 8 May 2019 13:00:51 +0530 +Subject: [PATCH 189/192] features/shard: Fix block-count accounting upon + truncate to lower size + + > Upstream: https://review.gluster.org/22681 + > BUG: 1705884 + > Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53 + +The way delta_blocks is computed in shard is incorrect, when a file +is truncated to a lower size. The accounting only considers change +in size of the last of the truncated shards. + +FIX: + +Get the block-count of each shard just before an unlink at posix in +xdata. Their summation plus the change in size of last shard +(from an actual truncate) is used to compute delta_blocks which is +used in the xattrop for size update. + +Change-Id: I9128a192e9bf8c3c3a959e96b7400879d03d7c53 +updates: bz#1668001 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173477 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/glusterfs/glusterfs.h | 2 + + tests/bugs/shard/bug-1705884.t | 32 +++++++++++++++ + xlators/features/shard/src/shard.c | 60 +++++++++++++++++++++++------ + xlators/features/shard/src/shard.h | 2 +- + xlators/storage/posix/src/posix-entry-ops.c | 9 +++++ + 5 files changed, 92 insertions(+), 13 deletions(-) + create mode 100644 tests/bugs/shard/bug-1705884.t + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 516b497..9ec2365 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -328,6 +328,8 @@ enum gf_internal_fop_indicator { + #define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count" + #define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count" + ++#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count" ++ + #define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup" + + #define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect" +diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t +new file mode 100644 +index 0000000..f6e5037 +--- /dev/null ++++ b/tests/bugs/shard/bug-1705884.t +@@ -0,0 +1,32 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fallocate.rc ++ ++cleanup ++ ++require_fallocate -l 1m $M0/file ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST fallocate -l 200M $M0/foo ++EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo ++TEST truncate -s 0 $M0/foo ++EXPECT "0" stat -c %b $M0/foo ++TEST fallocate -l 100M $M0/foo ++EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index c1799ad..b248767 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1135,6 +1135,7 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, + { + int ret = -1; + int64_t *size_attr = NULL; ++ int64_t delta_blocks = 0; + inode_t *inode = NULL; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; +@@ -1156,13 +1157,13 @@ shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, + + /* If both size and block count have not changed, then skip the xattrop. + */ +- if ((local->delta_size + local->hole_size == 0) && +- (local->delta_blocks == 0)) { ++ delta_blocks = GF_ATOMIC_GET(local->delta_blocks); ++ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { + goto out; + } + + ret = shard_set_size_attrs(local->delta_size + local->hole_size, +- local->delta_blocks, &size_attr); ++ delta_blocks, &size_attr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, + "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); +@@ -1947,6 +1948,7 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + dict_t *xdata) + { + inode_t *inode = NULL; ++ int64_t delta_blocks = 0; + shard_local_t *local = NULL; + + local = frame->local; +@@ -1967,14 +1969,15 @@ shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + } + + local->postbuf.ia_size = local->offset; +- local->postbuf.ia_blocks -= (prebuf->ia_blocks - postbuf->ia_blocks); + /* Let the delta be negative. We want xattrop to do subtraction */ + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- local->delta_blocks = postbuf->ia_blocks - prebuf->ia_blocks; ++ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, ++ postbuf->ia_blocks - prebuf->ia_blocks); ++ GF_ASSERT(delta_blocks <= 0); ++ local->postbuf.ia_blocks += delta_blocks; + local->hole_size = 0; + +- shard_inode_ctx_set(inode, this, postbuf, 0, SHARD_MASK_TIMES); +- ++ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; +@@ -2034,8 +2037,10 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) + { ++ int ret = 0; + int call_count = 0; + int shard_block_num = (long)cookie; ++ uint64_t block_count = 0; + shard_local_t *local = NULL; + + local = frame->local; +@@ -2045,6 +2050,16 @@ shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + local->op_errno = op_errno; + goto done; + } ++ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); ++ if (!ret) { ++ GF_ATOMIC_SUB(local->delta_blocks, block_count); ++ } else { ++ /* dict_get failed possibly due to a heterogeneous cluster? */ ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get key %s from dict during truncate of gfid %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ } + + shard_unlink_block_inode(local, shard_block_num); + done: +@@ -2074,6 +2089,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; ++ dict_t *xdata_req = NULL; + + local = frame->local; + priv = this->private; +@@ -2101,7 +2117,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) + local->postbuf.ia_size = local->offset; + local->postbuf.ia_blocks = local->prebuf.ia_blocks; + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- local->delta_blocks = 0; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); + local->hole_size = 0; + shard_update_file_size(frame, this, local->fd, &local->loc, + shard_post_update_size_truncate_handler); +@@ -2110,6 +2126,21 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) + + local->call_count = call_count; + i = 1; ++ xdata_req = dict_new(); ++ if (!xdata_req) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } ++ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict during truncate of %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ dict_unref(xdata_req); ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + + SHARD_SET_ROOT_FS_ID(frame, local); + while (cur_block <= last_block) { +@@ -2148,7 +2179,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) + + STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, + (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, 0, NULL); ++ FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); + loc_wipe(&loc); + next: + i++; +@@ -2156,6 +2187,7 @@ shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) + if (!--call_count) + break; + } ++ dict_unref(xdata_req); + return 0; + } + +@@ -2608,7 +2640,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) + */ + local->hole_size = local->offset - local->prebuf.ia_size; + local->delta_size = 0; +- local->delta_blocks = 0; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); + local->postbuf.ia_size = local->offset; + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +@@ -2624,7 +2656,7 @@ shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) + */ + local->hole_size = 0; + local->delta_size = (local->offset - local->prebuf.ia_size); +- local->delta_blocks = 0; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, + SHARD_INODE_WRITE_MASK); +@@ -2680,6 +2712,7 @@ shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + if (!local->xattr_req) + goto err; + local->resolver_base_inode = loc->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_truncate_handler); +@@ -2735,6 +2768,7 @@ shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + local->resolver_base_inode = fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_truncate_handler); +@@ -5295,7 +5329,8 @@ shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, + local->op_errno = op_errno; + } else { + local->written_size += op_ret; +- local->delta_blocks += (post->ia_blocks - pre->ia_blocks); ++ GF_ATOMIC_ADD(local->delta_blocks, ++ post->ia_blocks - pre->ia_blocks); + local->delta_size += (post->ia_size - pre->ia_size); + shard_inode_ctx_set(local->fd->inode, this, post, 0, + SHARD_MASK_TIMES); +@@ -6599,6 +6634,7 @@ shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, + local->fd = fd_ref(fd); + local->block_size = block_size; + local->resolver_base_inode = local->fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index cd6a663..04abd62 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -275,7 +275,7 @@ typedef struct shard_local { + size_t req_size; + size_t readdir_size; + int64_t delta_size; +- int64_t delta_blocks; ++ gf_atomic_t delta_blocks; + loc_t loc; + loc_t dot_shard_loc; + loc_t dot_shard_rm_loc; +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index b24a052..34ee2b8 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -1071,6 +1071,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; ++ int ret = -1; + struct iatt stbuf = { + 0, + }; +@@ -1235,6 +1236,14 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + goto out; + } + ++ if (xdata && dict_get(xdata, GF_GET_FILE_BLOCK_COUNT)) { ++ ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT, ++ stbuf.ia_blocks); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, ++ "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT); ++ } ++ + if (xdata && dict_get(xdata, GET_LINK_COUNT)) + get_link_count = _gf_true; + op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf, +-- +1.8.3.1 + diff --git a/SOURCES/0190-Build-removing-the-hardcoded-usage-of-python3.patch b/SOURCES/0190-Build-removing-the-hardcoded-usage-of-python3.patch new file mode 100644 index 0000000..25f01a8 --- /dev/null +++ b/SOURCES/0190-Build-removing-the-hardcoded-usage-of-python3.patch @@ -0,0 +1,49 @@ +From c7aae487213e464b2ee7a785d752bd8264ceb371 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Thu, 13 Jun 2019 20:12:14 +0530 +Subject: [PATCH 190/192] Build: removing the hardcoded usage of python3 + +Label : DOWNSTREAM ONLY + +Problem: RHEL8 needed python3 so python3 was hardcoded to be used +in gluster build. python2 was still being used by RHEL7 machines and +when the shebang was redirected to use python3 glusterfind failed. +It was not working from 6.0-5 downstream build. + +Fix: revert back to the old mechanism where we check the python version +and redirect the python script according to the usage. + +Change-Id: I8dc6c9185b2740e20e4c4d734cc1a9e335e9c449 +fixes: bz#1719640 +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173392 +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9c7d7a7..0127e8e 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -722,10 +722,12 @@ GlusterFS Events + + %prep + %setup -q -n %{name}-%{version}%{?prereltag} ++%if ( ! %{_usepython3} ) + echo "fixing python shebangs..." +-for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do +- sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i ++for f in api events extras geo-replication libglusterfs tools xlators; do ++find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \; + done ++%endif + + %build + +-- +1.8.3.1 + diff --git a/SOURCES/0191-Build-Update-python-shebangs-based-on-version.patch b/SOURCES/0191-Build-Update-python-shebangs-based-on-version.patch new file mode 100644 index 0000000..0d88b9d --- /dev/null +++ b/SOURCES/0191-Build-Update-python-shebangs-based-on-version.patch @@ -0,0 +1,49 @@ +From 4f471c25dad4d7d51443005108ec53c2d390daf5 Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Fri, 14 Jun 2019 20:20:26 +0530 +Subject: [PATCH 191/192] Build: Update python shebangs based on version + +RHEL 7 uses python2 where as RHEL 8 uses python 3. +Updating the spec file to use appropriate shebangs +to avoid script failures. + +Label : DOWNSTREAM ONLY + +BUG: 1719640 +Change-Id: I075764b6a00ba53a305451e3fc58584facd75a78 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173518 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Hari Gowtham Gopal <hgowtham@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 0127e8e..29e4a37 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -722,11 +722,15 @@ GlusterFS Events + + %prep + %setup -q -n %{name}-%{version}%{?prereltag} +-%if ( ! %{_usepython3} ) + echo "fixing python shebangs..." +-for f in api events extras geo-replication libglusterfs tools xlators; do +-find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \; +-done ++%if ( %{_usepython3} ) ++ for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do ++ sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i ++ done ++%else ++ for f in api events extras geo-replication libglusterfs tools xlators; do ++ find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \; ++ done + %endif + + %build +-- +1.8.3.1 + diff --git a/SOURCES/0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch b/SOURCES/0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch new file mode 100644 index 0000000..e1b2ada --- /dev/null +++ b/SOURCES/0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch @@ -0,0 +1,114 @@ +From d2319a4746ba07ada5b3a20462ec2900e1c03c5a Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Thu, 13 Jun 2019 19:56:32 +0530 +Subject: [PATCH 192/192] build: Ensure gluster-cli package is built as part of + client build + +Till RHGS 3.4.x RHGS client was shipping gluster-cli rpm. With RHGS 3.5 +which is a rebase of glusterfs 6.0 gluster-cli is only built for server. +gluster cli offers a remote cli execution capability with --remote-host +option for which you need not to have cli and glusterd co located and +hence shipping cli as part of the client package is mandatory. With out +this change the client upgrade for RHEL minor versions are also broken. + +>Fixes: bz#1720615 +>Change-Id: I5071f3255ff615113b36b08cd5326be6e37d907d +>Signed-off-by: Niels de Vos <ndevos@redhat.com> + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22868/ + +BUG: 1720079 +Change-Id: I11ec3e2b4d98b3e701147c60ca797d54570d598e +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173388 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + cli/src/Makefile.am | 2 -- + doc/Makefile.am | 4 ++-- + glusterfs.spec.in | 9 +++------ + 3 files changed, 5 insertions(+), 10 deletions(-) + +diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am +index 6be070f..3e7511f 100644 +--- a/cli/src/Makefile.am ++++ b/cli/src/Makefile.am +@@ -1,6 +1,4 @@ +-if WITH_SERVER + sbin_PROGRAMS = gluster +-endif + + gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c cli-cmd-global.c \ + cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\ +diff --git a/doc/Makefile.am b/doc/Makefile.am +index 7c04d74..9904767 100644 +--- a/doc/Makefile.am ++++ b/doc/Makefile.am +@@ -1,9 +1,9 @@ + EXTRA_DIST = glusterfs.8 mount.glusterfs.8 gluster.8 \ + glusterd.8 glusterfsd.8 + +-man8_MANS = glusterfs.8 mount.glusterfs.8 ++man8_MANS = gluster.8 glusterfs.8 mount.glusterfs.8 + if WITH_SERVER +-man8_MANS += gluster.8 glusterd.8 glusterfsd.8 ++man8_MANS += glusterd.8 glusterfsd.8 + endif + + CLEANFILES = +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 29e4a37..c505cd9 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -353,7 +353,6 @@ is in user space and easily manageable. + + This package provides the api include files. + +-%if ( 0%{!?_without_server:1} ) + %package cli + Summary: GlusterFS CLI + Requires: %{name}-libs%{?_isa} = %{version}-%{release} +@@ -368,7 +367,6 @@ called Translators from GNU Hurd kernel. Much of the code in GlusterFS + is in user space and easily manageable. + + This package provides the GlusterFS CLI application and its man page +-%endif + + %package cloudsync-plugins + Summary: Cloudsync Plugins +@@ -891,10 +889,8 @@ touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid + find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs + + ## Install bash completion for cli +-%if ( 0%{!?_without_server:1} ) + install -p -m 0744 -D extras/command-completion/gluster.bash \ + %{buildroot}%{_sysconfdir}/bash_completion.d/gluster +-%endif + + %if ( 0%{!?_without_server:1} ) + echo "RHGS 3.5" > %{buildroot}%{_datadir}/glusterfs/release +@@ -1193,12 +1189,10 @@ exit 0 + %dir %{_includedir}/glusterfs/api + %{_includedir}/glusterfs/api/* + +-%if ( 0%{!?_without_server:1} ) + %files cli + %{_sbindir}/gluster + %{_mandir}/man8/gluster.8* + %{_sysconfdir}/bash_completion.d/gluster +-%endif + + %files cloudsync-plugins + %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins +@@ -1938,6 +1932,9 @@ fi + %endif + + %changelog ++* Fri Jun 14 2019 Atin Mukherjee <amukherj@redhat.com> ++- Ensure gluster-cli package is part of client build (#1720079) ++ + * Mon May 27 2019 Jiffin Tony Thottan <jthottan@redhat.com> + - Change the dependency to 2.7.3 on nfs-ganesha for glusterfs-ganesha (#1714078) + +-- +1.8.3.1 + diff --git a/SOURCES/0193-spec-fixed-python-dependency-for-rhel6.patch b/SOURCES/0193-spec-fixed-python-dependency-for-rhel6.patch new file mode 100644 index 0000000..6b00b69 --- /dev/null +++ b/SOURCES/0193-spec-fixed-python-dependency-for-rhel6.patch @@ -0,0 +1,42 @@ +From 58bc818f19cbc8e4dd97097dc3e4ec7af8fa8d4a Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Tue, 7 May 2019 05:35:11 +0000 +Subject: [PATCH 193/221] spec: fixed python dependency for rhel6 + +Installing redhat-storage-server was failing with python dependency +for glusterfs-geo-replication package. This patch conditionally sets +the python version for rhel7 and fixes the problem. + +Label: DOWNSTREAM ONLY + +BUG: 1704207 + +Change-Id: Ie3b079fd1ccfa6fd2cbf5b08b7a70bd03f090e01 +fixes: bz#1704207 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/169555 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfs.spec.in | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index c505cd9..1150101 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -500,7 +500,11 @@ Summary: GlusterFS Geo-replication + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-server%{?_isa} = %{version}-%{release} + Requires: python%{_pythonver} ++%if ( 0%{?rhel} && 0%{?rhel} < 7 ) ++Requires: python-prettytable ++%else + Requires: python%{_pythonver}-prettytable ++%endif + Requires: python%{_pythonver}-gluster = %{version}-%{release} + + Requires: rsync +-- +1.8.3.1 + diff --git a/SOURCES/0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch b/SOURCES/0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch new file mode 100644 index 0000000..7b8371f --- /dev/null +++ b/SOURCES/0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch @@ -0,0 +1,144 @@ +From 783f53b0b09845cd6c38f145eac685a094767ce0 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 27 May 2019 11:43:26 +0530 +Subject: [PATCH 194/221] stack: Make sure to have unique call-stacks in all + cases + +At the moment new stack doesn't populate frame->root->unique in all cases. This +makes it difficult to debug hung frames by examining successive state dumps. +Fuse and server xlators populate it whenever they can, but other xlators won't +be able to assign 'unique' when they need to create a new frame/stack because +they don't know what 'unique' fuse/server xlators already used. What we need is +for unique to be correct. If a stack with same unique is present in successive +statedumps, that means the same operation is still in progress. This makes +'finding hung frames' part of debugging hung frames easier. + + >upstream: bz#1714098 + >Upstream-patch: https://review.gluster.org/c/glusterfs/+/22773 +fixes bz#1716760 +Change-Id: I3e9a8f6b4111e260106c48a2ac3a41ef29361b9e +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/172304 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + libglusterfs/src/stack.c | 2 ++ + xlators/features/quota/src/quotad-helpers.c | 3 --- + xlators/mount/fuse/src/fuse-bridge.c | 15 ++++++++------- + xlators/mount/fuse/src/fuse-helpers.c | 1 - + xlators/protocol/server/src/server-helpers.c | 3 --- + 5 files changed, 10 insertions(+), 14 deletions(-) + +diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c +index 82b3577..371f60c 100644 +--- a/libglusterfs/src/stack.c ++++ b/libglusterfs/src/stack.c +@@ -17,6 +17,7 @@ create_frame(xlator_t *xl, call_pool_t *pool) + { + call_stack_t *stack = NULL; + call_frame_t *frame = NULL; ++ static uint64_t unique = 0; + + if (!xl || !pool) { + return NULL; +@@ -52,6 +53,7 @@ create_frame(xlator_t *xl, call_pool_t *pool) + { + list_add(&stack->all_frames, &pool->all_frames); + pool->cnt++; ++ stack->unique = unique++; + } + UNLOCK(&pool->lock); + GF_ATOMIC_INC(pool->total_count); +diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c +index be8f908..d9f0351 100644 +--- a/xlators/features/quota/src/quotad-helpers.c ++++ b/xlators/features/quota/src/quotad-helpers.c +@@ -73,7 +73,6 @@ quotad_aggregator_alloc_frame(rpcsvc_request_t *req) + goto out; + + frame->root->state = state; +- frame->root->unique = 0; + + frame->this = this; + out: +@@ -93,8 +92,6 @@ quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req) + + frame->root->op = req->procnum; + +- frame->root->unique = req->xid; +- + frame->root->uid = req->uid; + frame->root->gid = req->gid; + frame->root->pid = req->pid; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index c3945d7..c05866b 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -3270,11 +3270,11 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg, + + priv = this->private; + +- fuse_log_eh(this, "RELEASE(): %" PRIu64 ":, fd: %p, gfid: %s", finh->unique, +- fd, uuid_utoa(fd->inode->gfid)); ++ fuse_log_eh(this, "RELEASE(): finh->unique: %" PRIu64 ":, fd: %p, gfid: %s", ++ finh->unique, fd, uuid_utoa(fd->inode->gfid)); + +- gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": RELEASE %p", +- finh->unique, state->fd); ++ gf_log("glusterfs-fuse", GF_LOG_TRACE, ++ "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd); + + fuse_fd_ctx_destroy(this, state->fd); + fd_unref(fd); +@@ -3759,11 +3759,12 @@ fuse_releasedir(xlator_t *this, fuse_in_header_t *finh, void *msg, + + priv = this->private; + +- fuse_log_eh(this, "RELEASEDIR (): %" PRIu64 ": fd: %p, gfid: %s", ++ fuse_log_eh(this, ++ "RELEASEDIR (): finh->unique: %" PRIu64 ": fd: %p, gfid: %s", + finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid)); + +- gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": RELEASEDIR %p", +- finh->unique, state->fd); ++ gf_log("glusterfs-fuse", GF_LOG_TRACE, ++ "finh->unique: %" PRIu64 ": RELEASEDIR %p", finh->unique, state->fd); + + fuse_fd_ctx_destroy(this, state->fd); + fd_unref(state->fd); +diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c +index cf4f8e1..5bfc40c 100644 +--- a/xlators/mount/fuse/src/fuse-helpers.c ++++ b/xlators/mount/fuse/src/fuse-helpers.c +@@ -358,7 +358,6 @@ get_call_frame_for_req(fuse_state_t *state) + frame->root->uid = finh->uid; + frame->root->gid = finh->gid; + frame->root->pid = finh->pid; +- frame->root->unique = finh->unique; + set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner); + } + +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index 1a34239..e74a24d 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -459,7 +459,6 @@ server_alloc_frame(rpcsvc_request_t *req) + + frame->root->client = client; + frame->root->state = state; /* which socket */ +- frame->root->unique = 0; /* which call */ + + frame->this = client->this; + out: +@@ -487,8 +486,6 @@ get_frame_from_request(rpcsvc_request_t *req) + + frame->root->op = req->procnum; + +- frame->root->unique = req->xid; +- + client = req->trans->xl_private; + this = req->trans->xl; + priv = this->private; +-- +1.8.3.1 + diff --git a/SOURCES/0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch b/SOURCES/0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch new file mode 100644 index 0000000..949ebb6 --- /dev/null +++ b/SOURCES/0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch @@ -0,0 +1,89 @@ +From 909a6461c860fffde5f886891dd53752f60eae67 Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Tue, 18 Jun 2019 12:10:55 +0530 +Subject: [PATCH 195/221] build : package glusterfs-ganesha for rhel7 and above + +Label : DOWNSTREAM ONLY + +Change-Id: If845675b18fe055708d905ec566014baf004cb76 +fixes: bz#1720551 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173748 +Reviewed-by: Sreenath Girijan Menon <sgirijan@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +--- + glusterfs.spec.in | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1150101..00603ec 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -302,6 +302,9 @@ Obsoletes: %{name}-ufo + %if ( 0%{!?_with_gnfs:1} ) + Obsoletes: %{name}-gnfs + %endif ++%if ( 0%{?rhel} < 7 ) ++Obsoletes: %{name}-ganesha ++%endif + Provides: %{name}-common = %{version}-%{release} + Provides: %{name}-core = %{version}-%{release} + +@@ -452,7 +455,7 @@ is in user space and easily manageable. + This package provides support to FUSE based clients and inlcudes the + glusterfs(d) binary. + +-%if ( 0%{!?_without_server:1} ) ++%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 ) + %package ganesha + Summary: NFS-Ganesha configuration + Group: Applications/File +@@ -855,7 +858,7 @@ install -D -p -m 0644 extras/glusterfs-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs + + # ganesha ghosts +-%if ( 0%{!?_without_server:1} ) ++%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 ) + mkdir -p %{buildroot}%{_sysconfdir}/ganesha + touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf + mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ +@@ -1165,11 +1168,14 @@ exit 0 + %endif + %endif + +-%if ( 0%{?_without_server:1} ) +-#exclude ganesha related files ++%if ( 0%{?_without_server:1} || 0%{?rhel} < 7 ) ++#exclude ganesha related files for rhel 6 and client builds + %exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample + %exclude %{_libexecdir}/ganesha/* + %exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* ++%if ( 0%{!?_without_server:1} ) ++%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh ++%endif + %endif + + %exclude %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh +@@ -1324,7 +1330,7 @@ exit 0 + %exclude %{_datadir}/glusterfs/tests/vagrant + %endif + +-%if ( 0%{!?_without_server:1} ) ++%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 ) + %files ganesha + %dir %{_libexecdir}/ganesha + %{_sysconfdir}/ganesha/ganesha-ha.conf.sample +@@ -1936,6 +1942,9 @@ fi + %endif + + %changelog ++* Tue Jun 18 2019 Jiffin Tony Thottan <jthottan@redhat.com> ++- build glusterfs-ganesha for rhel 7 and above (#1720551) ++ + * Fri Jun 14 2019 Atin Mukherjee <amukherj@redhat.com> + - Ensure gluster-cli package is part of client build (#1720079) + +-- +1.8.3.1 + diff --git a/SOURCES/0196-posix-ctime-Fix-ctime-upgrade-issue.patch b/SOURCES/0196-posix-ctime-Fix-ctime-upgrade-issue.patch new file mode 100644 index 0000000..1a7b68d --- /dev/null +++ b/SOURCES/0196-posix-ctime-Fix-ctime-upgrade-issue.patch @@ -0,0 +1,384 @@ +From 584ee2dbb8158ee3d3c3f055f1b06ff3d9177192 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Thu, 13 Jun 2019 16:23:21 +0530 +Subject: [PATCH 196/221] posix/ctime: Fix ctime upgrade issue + +Problem: +On a EC volume, during upgrade from the older version where +ctime feature is not enabled(or not present) to the newer +version where the ctime feature is available (enabled default), +the self heal hangs and doesn't complete. + +Cause: +The ctime feature has both client side code (utime) and +server side code (posix). The feature is driven from client. +Only if the client side sets the time in the frame, should +the server side sets the time attributes in xattr. But posix +setattr/fseattr was not doing that. When one of the server +nodes is updated, since ctime is enabled by default, it +starts setting xattr on setattr/fseattr on the updated node/brick. + +On a EC volume the first two updated nodes(bricks) are not a +problem because there are 4 other bricks with consistent data. +However once the third brick is updated, the new attribute(mdata xattr) +will cause an inconsistency on metadata on 3 bricks, which +prevents the file to be repaired. + +Fix: +Don't create mdata xattr with utimes/utimensat system call. +Only update if already present. + +Backport of: + > Patch: https://review.gluster.org/22858 + > Change-Id: Ieacedecb8a738bb437283ef3e0f042fd49dc4c8c + > fixes: bz#1720201 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: Ieacedecb8a738bb437283ef3e0f042fd49dc4c8c +BUG: 1713664 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174238 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/afr/split-brain-healing.t | 36 ++++--- + tests/utils/get-mdata-xattr.c | 152 +++++++++++++++++++++++++++++ + tests/volume.rc | 30 ++++++ + xlators/storage/posix/src/posix-metadata.c | 21 ++++ + 4 files changed, 223 insertions(+), 16 deletions(-) + create mode 100644 tests/utils/get-mdata-xattr.c + +diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t +index c80f900..78553e6 100644 +--- a/tests/basic/afr/split-brain-healing.t ++++ b/tests/basic/afr/split-brain-healing.t +@@ -20,11 +20,14 @@ function get_replicate_subvol_number { + cleanup; + + AREQUAL_PATH=$(dirname $0)/../../utils ++GET_MDATA_PATH=$(dirname $0)/../../utils + CFLAGS="" + test "`uname -s`" != "Linux" && { + CFLAGS="$CFLAGS -lintl"; + } + build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS ++build_tester $GET_MDATA_PATH/get-mdata-xattr.c ++ + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} +@@ -152,13 +155,13 @@ EXPECT $SMALLER_FILE_SIZE stat -c %s file4 + subvolume=$(get_replicate_subvol_number file5) + if [ $subvolume == 0 ] + then +- mtime1=$(stat -c %Y $B0/${V0}1/file5) +- mtime2=$(stat -c %Y $B0/${V0}2/file5) ++ mtime1=$(get_mtime $B0/${V0}1/file5) ++ mtime2=$(get_mtime $B0/${V0}2/file5) + LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) + elif [ $subvolume == 1 ] + then +- mtime1=$(stat -c %Y $B0/${V0}3/file5) +- mtime2=$(stat -c %Y $B0/${V0}4/file5) ++ mtime1=$(get_mtime $B0/${V0}3/file5) ++ mtime2=$(get_mtime $B0/${V0}4/file5) + LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) + fi + $CLI volume heal $V0 split-brain latest-mtime /file5 +@@ -166,12 +169,12 @@ EXPECT "0" echo $? + + if [ $subvolume == 0 ] + then +- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file5) +- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file5) ++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5) ++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5) + elif [ $subvolume == 1 ] + then +- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file5) +- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file5) ++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5) ++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5) + fi + + #TODO: To below comparisons on full sub-second resolution +@@ -188,14 +191,14 @@ subvolume=$(get_replicate_subvol_number file6) + if [ $subvolume == 0 ] + then + GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6) +- mtime1=$(stat -c %Y $B0/${V0}1/file6) +- mtime2=$(stat -c %Y $B0/${V0}2/file6) ++ mtime1=$(get_mtime $B0/${V0}1/file6) ++ mtime2=$(get_mtime $B0/${V0}2/file6) + LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) + elif [ $subvolume == 1 ] + then + GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6) +- mtime1=$(stat -c %Y $B0/${V0}3/file6) +- mtime2=$(stat -c %Y $B0/${V0}4/file6) ++ mtime1=$(get_mtime $B0/${V0}3/file6) ++ mtime2=$(get_mtime $B0/${V0}4/file6) + LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) + fi + GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" +@@ -204,12 +207,12 @@ EXPECT "0" echo $? + + if [ $subvolume == 0 ] + then +- mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file6) +- mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file6) ++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6) ++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6) + elif [ $subvolume == 1 ] + then +- mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file6) +- mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file6) ++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6) ++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6) + fi + + #TODO: To below comparisons on full sub-second resolution +@@ -253,4 +256,5 @@ EXPECT "1" echo $? + + cd - + TEST rm $AREQUAL_PATH/arequal-checksum ++TEST rm $GET_MDATA_PATH/get-mdata-xattr + cleanup +diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c +new file mode 100644 +index 0000000..e9f5471 +--- /dev/null ++++ b/tests/utils/get-mdata-xattr.c +@@ -0,0 +1,152 @@ ++/* ++ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include <stdlib.h> ++#include <endian.h> ++#include <stdio.h> ++#include <time.h> ++#include <string.h> ++#include <inttypes.h> ++#include <sys/types.h> ++#include <sys/xattr.h> ++#include <errno.h> ++ ++typedef struct gf_timespec_disk { ++ uint64_t tv_sec; ++ uint64_t tv_nsec; ++} gf_timespec_disk_t; ++ ++/* posix_mdata_t on disk structure */ ++typedef struct __attribute__((__packed__)) posix_mdata_disk { ++ /* version of structure, bumped up if any new member is added */ ++ uint8_t version; ++ /* flags indicates valid fields in the structure */ ++ uint64_t flags; ++ gf_timespec_disk_t ctime; ++ gf_timespec_disk_t mtime; ++ gf_timespec_disk_t atime; ++} posix_mdata_disk_t; ++ ++/* In memory representation posix metadata xattr */ ++typedef struct { ++ /* version of structure, bumped up if any new member is added */ ++ uint8_t version; ++ /* flags indicates valid fields in the structure */ ++ uint64_t flags; ++ struct timespec ctime; ++ struct timespec mtime; ++ struct timespec atime; ++} posix_mdata_t; ++ ++#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata" ++ ++/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order ++ */ ++static inline void ++posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) ++{ ++ out->version = in->version; ++ out->flags = be64toh(in->flags); ++ ++ out->ctime.tv_sec = be64toh(in->ctime.tv_sec); ++ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec); ++ ++ out->mtime.tv_sec = be64toh(in->mtime.tv_sec); ++ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec); ++ ++ out->atime.tv_sec = be64toh(in->atime.tv_sec); ++ out->atime.tv_nsec = be64toh(in->atime.tv_nsec); ++} ++ ++/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ ++static int ++posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata) ++{ ++ size_t size = -1; ++ char *value = NULL; ++ char gfid_str[64] = {0}; ++ ++ char *key = GF_XATTR_MDATA_KEY; ++ ++ if (!metadata || !real_path) { ++ goto err; ++ } ++ ++ /* Get size */ ++ size = lgetxattr(real_path, key, NULL, 0); ++ if (size == -1) { ++ goto err; ++ } ++ ++ value = calloc(size + 1, sizeof(char)); ++ if (!value) { ++ goto err; ++ } ++ ++ /* Get xattr value */ ++ size = lgetxattr(real_path, key, value, size); ++ if (size == -1) { ++ goto err; ++ } ++ posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value); ++ ++out: ++ if (value) ++ free(value); ++ return 0; ++err: ++ if (value) ++ free(value); ++ return -1; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ posix_mdata_t metadata; ++ uint64_t result; ++ ++ if (argc != 3) { ++ /* ++ Usage: get_mdata_xattr -c|-m|-a <file-name> ++ where -c --> ctime ++ -m --> mtime ++ -a --> atime ++ */ ++ printf("-1"); ++ goto err; ++ } ++ ++ if (posix_fetch_mdata_xattr(argv[2], &metadata)) { ++ printf("-1"); ++ goto err; ++ } ++ ++ switch (argv[1][1]) { ++ case 'c': ++ result = metadata.ctime.tv_sec; ++ break; ++ case 'm': ++ result = metadata.mtime.tv_sec; ++ break; ++ case 'a': ++ result = metadata.atime.tv_sec; ++ break; ++ default: ++ printf("-1"); ++ goto err; ++ } ++ printf("%" PRIu64, result); ++ fflush(stdout); ++ return 0; ++err: ++ fflush(stdout); ++ return -1; ++} +diff --git a/tests/volume.rc b/tests/volume.rc +index bb400cc..6a78c37 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -927,3 +927,33 @@ function number_healer_threads_shd { + local pid=$(get_shd_mux_pid $1) + pstack $pid | grep $2 | wc -l + } ++ ++function get_mtime { ++ local time=$(get-mdata-xattr -m $1) ++ if [ $time == "-1" ]; ++ then ++ echo $(stat -c %Y $1) ++ else ++ echo $time ++ fi ++} ++ ++function get_ctime { ++ local time=$(get-mdata-xattr -c $1) ++ if [ $time == "-1" ]; ++ then ++ echo $(stat -c %Z $2) ++ else ++ echo $time ++ fi ++} ++ ++function get_atime { ++ local time=$(get-mdata-xattr -a $1) ++ if [ $time == "-1" ]; ++ then ++ echo $(stat -c %X $1) ++ else ++ echo $time ++ fi ++} +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index e96f222..5a5e6cd 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -416,6 +416,22 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + * still fine as the times would get eventually + * accurate. + */ ++ ++ /* Don't create xattr with utimes/utimensat, only update if ++ * present. This otherwise causes issues during inservice ++ * upgrade. It causes inconsistent xattr values with in replica ++ * set. The scenario happens during upgrade where clients are ++ * older versions (without the ctime feature) and the server is ++ * upgraded to the new version (with the ctime feature which ++ * is enabled by default). ++ */ ++ ++ if (update_utime) { ++ UNLOCK(&inode->lock); ++ GF_FREE(mdata); ++ return 0; ++ } ++ + mdata->version = 1; + mdata->flags = 0; + mdata->ctime.tv_sec = time->tv_sec; +@@ -527,6 +543,11 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + + priv = this->private; + ++ /* NOTE: ++ * This routine (utimes) is intentionally allowed for all internal and ++ * external clients even if ctime is not set. This is because AFR and ++ * WORM uses time attributes for it's internal operations ++ */ + if (inode && priv->ctime) { + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv.tv_sec = stbuf->ia_atime; +-- +1.8.3.1 + diff --git a/SOURCES/0197-posix-fix-crash-in-posix_cs_set_state.patch b/SOURCES/0197-posix-fix-crash-in-posix_cs_set_state.patch new file mode 100644 index 0000000..c17e6c2 --- /dev/null +++ b/SOURCES/0197-posix-fix-crash-in-posix_cs_set_state.patch @@ -0,0 +1,71 @@ +From 58070aa568ffbaac267b02428e974b2459ae13b0 Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Tue, 18 Jun 2019 16:43:43 +0530 +Subject: [PATCH 197/221] :posix: fix crash in posix_cs_set_state + +> Fixes: bz#1721474 +> Change-Id: Ic2a53fa3d1e9e23424c6898e0986f80d52c5e3f6 +> Signed-off-by: Susant Palai <spalai@redhat.com> +(cherry-pick of https://review.gluster.org/#/c/glusterfs/+/22892/) + +BUG: 1721477 +Change-Id: Ic2a53fa3d1e9e23424c6898e0986f80d52c5e3f6 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173936 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/storage/posix/src/posix-helpers.c | 5 +++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 7 ++++--- + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index aecf4f8..849db3d 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -3235,6 +3235,11 @@ posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state, + char *value = NULL; + size_t xattrsize = 0; + ++ if (!rsp) { ++ ret = -1; ++ goto out; ++ } ++ + if (!(*rsp)) { + *rsp = dict_new(); + if (!(*rsp)) { +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 7ca4d26..b92c411 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1028,6 +1028,7 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt statpost = { + 0, + }; ++ dict_t *rsp_xdata = NULL; + + #ifdef FALLOC_FL_KEEP_SIZE + if (keep_size) +@@ -1035,15 +1036,15 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + #endif /* FALLOC_FL_KEEP_SIZE */ + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, +- &statpost, xdata, NULL); ++ &statpost, xdata, &rsp_xdata); + if (ret < 0) + goto err; + +- STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); ++ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + + err: +- STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); ++ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata); + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0198-cluster-ec-Prevent-double-pre-op-xattrops.patch b/SOURCES/0198-cluster-ec-Prevent-double-pre-op-xattrops.patch new file mode 100644 index 0000000..5e7c272 --- /dev/null +++ b/SOURCES/0198-cluster-ec-Prevent-double-pre-op-xattrops.patch @@ -0,0 +1,119 @@ +From 9912a432dc3493007462f76c5933d04a160814ae Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Thu, 20 Jun 2019 17:05:49 +0530 +Subject: [PATCH 198/221] cluster/ec: Prevent double pre-op xattrops + +Problem: +Race: +Thread-1 Thread-2 +1) Does ec_get_size_version() to perform +pre-op fxattrop as part of write-1 + 2) Calls ec_set_dirty_flag() in + ec_get_size_version() for write-2. + This sets dirty[] to 1 +3) Completes executing +ec_prepare_update_cbk leading to +ctx->dirty[] = '1' + 4) Takes LOCK(inode->lock) to check if there are + any flags and sets dirty-flag because + lock->waiting_flag is 0 now. This leads to + fxattrop to increment on-disk dirty[] to '2' + +At the end of the writes the file will be marked for heal even when it doesn't need heal. + +Fix: +Perform ec_set_dirty_flag() and other checks inside LOCK() to prevent dirty[] to be marked +as '1' in step 2) above + + > Upstream-patch: https://review.gluster.org/c/glusterfs/+/22907 + +fixes: bz#1600918 +Change-Id: Icac2ab39c0b1e7e154387800fbededc561612865 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174385 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/ec/ec-dirty-flags.t | 23 +++++++++++++++++++++++ + xlators/cluster/ec/src/ec-common.c | 13 +++++++------ + 2 files changed, 30 insertions(+), 6 deletions(-) + create mode 100644 tests/basic/ec/ec-dirty-flags.t + +diff --git a/tests/basic/ec/ec-dirty-flags.t b/tests/basic/ec/ec-dirty-flags.t +new file mode 100644 +index 0000000..68e6610 +--- /dev/null ++++ b/tests/basic/ec/ec-dirty-flags.t +@@ -0,0 +1,23 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++# This checks if the fop keeps the dirty flags settings correctly after ++# finishing the fop. ++ ++cleanup ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++cd $M0 ++for i in {1..1000}; do dd if=/dev/zero of=file-${i} bs=512k count=2; done ++cd - ++EXPECT "^0$" get_pending_heal_count $V0 ++ ++cleanup +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 9cc6395..35c2256 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -1405,6 +1405,10 @@ ec_get_size_version(ec_lock_link_t *link) + !ec_is_data_fop(fop->id)) + link->optimistic_changelog = _gf_true; + ++ memset(&loc, 0, sizeof(loc)); ++ ++ LOCK(&lock->loc.inode->lock); ++ + set_dirty = ec_set_dirty_flag(link, ctx, dirty); + + /* If ec metadata has already been retrieved, do not try again. */ +@@ -1412,20 +1416,16 @@ ec_get_size_version(ec_lock_link_t *link) + if (ec_is_data_fop(fop->id)) { + fop->healing |= lock->healing; + } +- return; ++ goto unlock; + } + + /* Determine if there's something we need to retrieve for the current + * operation. */ + if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) && + (lock->loc.inode->ia_type != IA_INVAL)) { +- return; ++ goto unlock; + } + +- memset(&loc, 0, sizeof(loc)); +- +- LOCK(&lock->loc.inode->lock); +- + changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty); + if (link->waiting_flags) { + /* This fop needs to wait until all its flags are cleared which +@@ -1436,6 +1436,7 @@ ec_get_size_version(ec_lock_link_t *link) + GF_ASSERT(!changed_flags); + } + ++unlock: + UNLOCK(&lock->loc.inode->lock); + + if (!changed_flags) +-- +1.8.3.1 + diff --git a/SOURCES/0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch b/SOURCES/0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch new file mode 100644 index 0000000..161675e --- /dev/null +++ b/SOURCES/0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch @@ -0,0 +1,80 @@ +From e41b4a45f9f5c07ffa38582d0bb4517f6a66eaa3 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Fri, 7 Jun 2019 19:33:07 +0530 +Subject: [PATCH 199/221] upcall: Avoid sending notifications for invalid + inodes + +For nameless LOOKUPs, server creates a new inode which shall +remain invalid until the fop is successfully processed post +which it is linked to the inode table. + +But incase if there is an already linked inode for that entry, +it discards that newly created inode which results in upcall +notification. This may result in client being bombarded with +unnecessary upcalls affecting performance if the data set is huge. + +This issue can be avoided by looking up and storing the upcall +context in the original linked inode (if exists), thus saving up on +those extra callbacks. + +This is backport of below upstream fix - +mainline: https://review.gluster.org/22840 +release-6: https://review.gluster.org/22873 + +Change-Id: I044a1737819bb40d1a049d2f53c0566e746d2a17 +fixes: bz#1717784 +Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173507 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +--- + xlators/features/upcall/src/upcall-internal.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c +index 46cf6f8..7998dd2 100644 +--- a/xlators/features/upcall/src/upcall-internal.c ++++ b/xlators/features/upcall/src/upcall-internal.c +@@ -520,6 +520,7 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, + upcall_client_t *tmp = NULL; + upcall_inode_ctx_t *up_inode_ctx = NULL; + gf_boolean_t found = _gf_false; ++ inode_t *linked_inode = NULL; + + if (!is_upcall_enabled(this)) + return; +@@ -532,7 +533,20 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, + return; + } + +- if (inode) ++ /* For nameless LOOKUPs, inode created shall always be ++ * invalid. Hence check if there is any already linked inode. ++ * If yes, update the inode_ctx of that valid inode ++ */ ++ if (inode && (inode->ia_type == IA_INVAL) && stbuf) { ++ linked_inode = inode_find(inode->table, stbuf->ia_gfid); ++ if (linked_inode) { ++ gf_log("upcall", GF_LOG_DEBUG, ++ "upcall_inode_ctx_get of linked inode (%p)", inode); ++ up_inode_ctx = upcall_inode_ctx_get(linked_inode, this); ++ } ++ } ++ ++ if (inode && !up_inode_ctx) + up_inode_ctx = upcall_inode_ctx_get(inode, this); + + if (!up_inode_ctx) { +@@ -600,6 +614,9 @@ upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client, + } + pthread_mutex_unlock(&up_inode_ctx->client_list_lock); + out: ++ /* release the ref from inode_find */ ++ if (linked_inode) ++ inode_unref(linked_inode); + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch b/SOURCES/0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch new file mode 100644 index 0000000..ffef4d4 --- /dev/null +++ b/SOURCES/0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch @@ -0,0 +1,206 @@ +From bd553499909d2d57fd05696dc7604901cef3a36a Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Fri, 7 Jun 2019 17:20:15 +0530 +Subject: [PATCH 200/221] gfapi: fix incorrect initialization of upcall syncop + arguments + +While sending upcall notifications via synctasks, the argument used to +carry relevant data for these tasks is not initialized properly. This patch +is to fix the same. + +This is backport of below upstream fix - +mainline: https://review.gluster.org/22839 +release-6: https://review.gluster.org/22871 + +Change-Id: I9fa8f841e71d3c37d3819fbd430382928c07176c +fixes: bz#1717784 +Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173508 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Kaleb Keithley <kkeithle@redhat.com> +--- + api/src/glfs-fops.c | 109 ++++++++++++++++++++++++++++++++++------------------ + 1 file changed, 72 insertions(+), 37 deletions(-) + +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index 01ba60b..396f18c 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -34,7 +34,7 @@ + + struct upcall_syncop_args { + struct glfs *fs; +- struct gf_upcall *upcall_data; ++ struct glfs_upcall *up_arg; + }; + + #define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1) +@@ -5714,12 +5714,28 @@ out: + } + + static int ++upcall_syncop_args_free(struct upcall_syncop_args *args) ++{ ++ if (args && args->up_arg) ++ GLFS_FREE(args->up_arg); ++ GF_FREE(args); ++ return 0; ++} ++ ++static int + glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque) + { + struct upcall_syncop_args *args = opaque; + +- GF_FREE(args->upcall_data); +- GF_FREE(args); ++ /* Here we not using upcall_syncop_args_free as application ++ * will be cleaning up the args->up_arg using glfs_free ++ * post processing upcall. ++ */ ++ if (ret) { ++ upcall_syncop_args_free(args); ++ } else ++ GF_FREE(args); ++ + return 0; + } + +@@ -5727,13 +5743,29 @@ static int + glfs_cbk_upcall_syncop(void *opaque) + { + struct upcall_syncop_args *args = opaque; +- int ret = -1; + struct glfs_upcall *up_arg = NULL; + struct glfs *fs; +- struct gf_upcall *upcall_data; + + fs = args->fs; +- upcall_data = args->upcall_data; ++ up_arg = args->up_arg; ++ ++ if (fs->up_cbk && up_arg) { ++ (fs->up_cbk)(up_arg, fs->up_data); ++ return 0; ++ } ++ ++ return -1; ++} ++ ++static struct upcall_syncop_args * ++upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data) ++{ ++ struct upcall_syncop_args *args = NULL; ++ int ret = -1; ++ struct glfs_upcall *up_arg = NULL; ++ ++ if (!fs || !upcall_data) ++ goto out; + + up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall), glfs_release_upcall, + glfs_mt_upcall_entry_t); +@@ -5754,33 +5786,51 @@ glfs_cbk_upcall_syncop(void *opaque) + errno = EINVAL; + } + +- if (!ret && (up_arg->reason != GLFS_UPCALL_EVENT_NULL)) { +- /* It could so happen that the file which got +- * upcall notification may have got deleted by +- * the same client. In such cases up_arg->reason +- * is set to GLFS_UPCALL_EVENT_NULL. No need to +- * send upcall then */ +- (fs->up_cbk)(up_arg, fs->up_data); +- } else if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) { ++ /* It could so happen that the file which got ++ * upcall notification may have got deleted by ++ * the same client. In such cases up_arg->reason ++ * is set to GLFS_UPCALL_EVENT_NULL. No need to ++ * send upcall then ++ */ ++ if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) { + gf_msg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_INVALID_ENTRY, + "Upcall_EVENT_NULL received. Skipping it."); + goto out; +- } else { ++ } else if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, + "Upcall entry validation failed."); + goto out; + } + ++ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args), ++ glfs_mt_upcall_entry_t); ++ if (!args) { ++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, ++ "Upcall syncop args allocation failed."); ++ goto out; ++ } ++ ++ /* Note: we are not taking any ref on fs here. ++ * Ideally applications have to unregister for upcall events ++ * or stop polling for upcall events before performing ++ * glfs_fini. And as for outstanding synctasks created, we wait ++ * for all syncenv threads to finish tasks before cleaning up the ++ * fs->ctx. Hence it seems safe to process these callback ++ * notification without taking any lock/ref. ++ */ ++ args->fs = fs; ++ args->up_arg = up_arg; ++ + /* application takes care of calling glfs_free on up_arg post + * their processing */ +- ret = 0; + ++ return args; + out: +- if (ret && up_arg) { ++ if (up_arg) { + GLFS_FREE(up_arg); + } + +- return 0; ++ return NULL; + } + + static void +@@ -5797,24 +5847,10 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data) + goto out; + } + +- args = GF_CALLOC(1, sizeof(struct upcall_syncop_args), +- glfs_mt_upcall_entry_t); +- if (!args) { +- gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, +- "Upcall syncop args allocation failed."); +- goto out; +- } ++ args = upcall_syncop_args_init(fs, upcall_data); + +- /* Note: we are not taking any ref on fs here. +- * Ideally applications have to unregister for upcall events +- * or stop polling for upcall events before performing +- * glfs_fini. And as for outstanding synctasks created, we wait +- * for all syncenv threads to finish tasks before cleaning up the +- * fs->ctx. Hence it seems safe to process these callback +- * notification without taking any lock/ref. +- */ +- args->fs = fs; +- args->upcall_data = gf_memdup(upcall_data, sizeof(*upcall_data)); ++ if (!args) ++ goto out; + + ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop, + glfs_upcall_syncop_cbk, NULL, args); +@@ -5823,8 +5859,7 @@ glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data) + gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED, + "Synctak for Upcall event_type(%d) and gfid(%s) failed", + upcall_data->event_type, (char *)(upcall_data->gfid)); +- GF_FREE(args->upcall_data); +- GF_FREE(args); ++ upcall_syncop_args_free(args); + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch b/SOURCES/0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch new file mode 100644 index 0000000..0884a87 --- /dev/null +++ b/SOURCES/0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch @@ -0,0 +1,44 @@ +From a61c2a81e5731e4e0b5136147f404e60d3c72ad0 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Tue, 18 Jun 2019 16:25:35 +0530 +Subject: [PATCH 201/221] geo-rep: Fix permissions for GEOREP_DIR in non-root + setup + +During mountbroker setup: 'gluster-mountbroker <mountbroker-root> <group>' +commad to set the permission and group for GEOREP_DIR directory +(/var/lib/glusterd/geo-replication) fails due to extra argument, which is +enssential for non-root geo-rep setup. + +Backport of: + +>Updtream patch: https://review.gluster.org/#/c/glusterfs/+/22890/ +>fixes: bz#1721441 +>Change-Id: Ia83442733bf0b29f630e8c9e398097316efca092 +>Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: bz#1722331 +Change-Id: Ia83442733bf0b29f630e8c9e398097316efca092 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174169 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/src/peer_mountbroker.py.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in +index ce33f97..96a7264 100644 +--- a/geo-replication/src/peer_mountbroker.py.in ++++ b/geo-replication/src/peer_mountbroker.py.in +@@ -197,7 +197,7 @@ class NodeSetup(Cmd): + execute(["chgrp", "-R", args.group, GEOREP_DIR]) + execute(["chgrp", "-R", args.group, LOG_DIR]) + execute(["chgrp", args.group, CLI_LOG]) +- execute(["chmod", "770", args.group, GEOREP_DIR]) ++ execute(["chmod", "770", GEOREP_DIR]) + execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}", + "+"]) + execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}", +-- +1.8.3.1 + diff --git a/SOURCES/0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch b/SOURCES/0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch new file mode 100644 index 0000000..7cadb24 --- /dev/null +++ b/SOURCES/0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch @@ -0,0 +1,46 @@ +From e386fb4f4baf834e6a8fc25cc2fbbb17eb0a7a56 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 20 Jun 2019 20:43:24 +0530 +Subject: [PATCH 202/221] shd/mux: Fix race between mux_proc unlink and stop + +There is a small race window, where we have a shd proc +without having a connection. That is when we stopped the +last shd running on a process. The list was removed +outside of a lock just after stopping the process. + +So there is a window where we stopped the process, but +the shd proc list contains the entry. + +Backport of: https://review.gluster.org/22909 + +>Change-Id: Id82a82509e5cd72acac24e8b7b87197626525441 +>fixes: bz#1722541 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I794131ede23f32fcfa5f71181149d8c1e7e439b8 +BUG: 1721802 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174541 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index d81d760..dbe2560 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -694,6 +694,9 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) + gf_is_service_running(svc->proc.pidfile, &pid); + cds_list_del_init(&svc->mux_svc); + empty = cds_list_empty(&svc_proc->svcs); ++ if (empty) { ++ cds_list_del_init(&svc_proc->svc_proc_list); ++ } + } + pthread_mutex_unlock(&conf->attach_lock); + if (empty) { +-- +1.8.3.1 + diff --git a/SOURCES/0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch b/SOURCES/0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch new file mode 100644 index 0000000..39c9cd8 --- /dev/null +++ b/SOURCES/0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch @@ -0,0 +1,233 @@ +From 541e1400ecaec5fea0f56e8ca18f00c229906d8a Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Tue, 18 Jun 2019 22:15:37 +0530 +Subject: [PATCH 203/221] glusterd/shd: Change shd logfile to a unique name + +With the shd mux changes, shd was havinga a logfile +with volname of the first started volume. + +This was creating a lot confusion, as other volumes data +is also logging to a logfile which has a different vol name. + +With this changes the logfile will be changed to a unique name +ie "/var/log/glusterfs/glustershd.log". This was the same +logfile name before the shd mux + +Backport of: https://review.gluster.org/22895 + +>Change-Id: I2b94c1f0b2cf3c9493505dddf873687755a46dda +>fixes: bz#1721601 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: Ia659386dd19f533fbadaf5a9d5453c9ef2acac64 +BUG: 1721351 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174542 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 12 -------- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 6 ---- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ++++----- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 34 +++++++++++++++++----- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 4 +++ + 6 files changed, 40 insertions(+), 34 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +index 9196758..57ceda9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +@@ -75,18 +75,6 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, + } + + void +-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len) +-{ +- snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname); +-} +- +-void +-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len) +-{ +- snprintf(logfile, len, "%s/shd.log", logdir); +-} +- +-void + glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) + { + glusterd_svc_proc_t *svc_proc = NULL; +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +index c70702c..59466ec 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +@@ -27,12 +27,6 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, + int path_len); + + void +-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len); +- +-void +-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len); +- +-void + glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd); + + int +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index dbe2560..8ad90a9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -90,8 +90,8 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); + glusterd_svc_create_rundir(rundir); + +- glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir)); +- glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile)); ++ glusterd_svc_build_logfile_path(shd_svc_name, DEFAULT_LOG_FILE_DIRECTORY, ++ logfile, sizeof(logfile)); + + /* Initialize the connection mgmt */ + if (mux_conn && mux_svc->rpc) { +@@ -104,7 +104,7 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + if (ret < 0) + goto out; + } else { +- ret = mkdir_p(logdir, 0755, _gf_true); ++ ret = mkdir_p(DEFAULT_LOG_FILE_DIRECTORY, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create logdir %s", logdir); +@@ -460,6 +460,7 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + return -1; + + glusterd_volinfo_ref(volinfo); ++ + if (!svc->inited) { + ret = glusterd_shd_svc_mux_init(volinfo, svc); + if (ret) +@@ -471,12 +472,11 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + /* Unref will happen from glusterd_svc_attach_cbk */ + ret = glusterd_attach_svc(svc, volinfo, flags); + if (ret) { +- glusterd_volinfo_unref(volinfo); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Failed to attach shd svc(volume=%s) to pid=%d. Starting" +- "a new process", ++ "Failed to attach shd svc(volume=%s) to pid=%d", + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags); ++ glusterd_shd_svcproc_cleanup(&volinfo->shd); ++ glusterd_volinfo_unref(volinfo); + } + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index a6e662f..400826f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -469,6 +469,9 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + glusterd_conf_t *conf = NULL; + glusterd_svc_t *parent_svc = NULL; + int pid = -1; ++ char pidfile[PATH_MAX] = { ++ 0, ++ }; + + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); +@@ -478,8 +481,26 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + + pthread_mutex_lock(&conf->attach_lock); + { ++ if (svc->inited && !glusterd_proc_is_running(&(svc->proc))) { ++ /* This is the case when shd process was abnormally killed */ ++ pthread_mutex_unlock(&conf->attach_lock); ++ glusterd_shd_svcproc_cleanup(&volinfo->shd); ++ pthread_mutex_lock(&conf->attach_lock); ++ } ++ + if (!svc->inited) { +- if (gf_is_service_running(svc->proc.pidfile, &pid)) { ++ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile)); ++ ret = snprintf(svc->proc.name, sizeof(svc->proc.name), "%s", ++ "glustershd"); ++ if (ret < 0) ++ goto unlock; ++ ++ ret = snprintf(svc->proc.pidfile, sizeof(svc->proc.pidfile), "%s", ++ pidfile); ++ if (ret < 0) ++ goto unlock; ++ ++ if (gf_is_service_running(pidfile, &pid)) { + /* Just connect is required, but we don't know what happens + * during the disconnect. So better to reattach. + */ +@@ -487,10 +508,10 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + } + + if (!mux_proc) { +- if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) { ++ if (pid != -1 && sys_access(pidfile, R_OK) == 0) { + /* stale pid file, stop and unlink it */ + glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE); +- glusterd_unlink_file(svc->proc.pidfile); ++ glusterd_unlink_file(pidfile); + } + mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); + } +@@ -684,11 +705,10 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, +- "svc %s of volume %s failed to " +- "attach to pid %d. Starting a new process", +- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ "svc %s of volume %s failed to attach to pid %d", svc->name, ++ volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + if (!strcmp(svc->name, "glustershd")) { +- glusterd_recover_shd_attach_failure(volinfo, svc, *flag); ++ glusterd_shd_svcproc_cleanup(&volinfo->shd); + } + } + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +index f32dafc..fa316a6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +@@ -33,14 +33,14 @@ glusterd_svc_create_rundir(char *rundir) + return ret; + } + +-static void ++void + glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile, + size_t len) + { + snprintf(logfile, len, "%s/%s.log", logdir, server); + } + +-static void ++void + glusterd_svc_build_volfileid_path(char *server, char *volfileid, size_t len) + { + snprintf(volfileid, len, "gluster/%s", server); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +index fbc5225..5a5466a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +@@ -74,6 +74,10 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile, + size_t len); + + void ++glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile, ++ size_t len); ++ ++void + glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len); + + void +-- +1.8.3.1 + diff --git a/SOURCES/0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch b/SOURCES/0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch new file mode 100644 index 0000000..6d05a0b --- /dev/null +++ b/SOURCES/0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch @@ -0,0 +1,60 @@ +From 4d0b11088c4a3a630d71acf902064d1ed10412e8 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Tue, 25 Jun 2019 11:11:10 +0530 +Subject: [PATCH 204/221] glusterd: conditionally clear txn_opinfo in stage op + +...otherwise this leads to a crash when volume status is run on a +heterogeneous mode. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22939/ + +>Fixes: bz#1723658 +>Change-Id: I0d39f412b2e5e9d3ef0a3462b90b38bb5364b09d +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1722131 +Change-Id: I0d39f412b2e5e9d3ef0a3462b90b38bb5364b09d +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174566 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index d0c1a2c..9ea695e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -5714,9 +5714,14 @@ glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx) + glusterd_op_info_t txn_op_info = { + {0}, + }; ++ glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); ++ ++ priv = this->private; ++ GF_ASSERT(priv); ++ + GF_ASSERT(ctx); + + req_ctx = ctx; +@@ -5768,9 +5773,12 @@ out: + gf_msg_debug(this->name, 0, "Returning with %d", ret); + + /* for no volname transactions, the txn_opinfo needs to be cleaned up +- * as there's no unlock event triggered ++ * as there's no unlock event triggered. However if the originator node of ++ * this transaction is still running with a version lower than 60000, ++ * txn_opinfo can't be cleared as that'll lead to a race of referring op_ctx ++ * after it's being freed. + */ +- if (txn_op_info.skip_locking) ++ if (txn_op_info.skip_locking && priv->op_version >= GD_OP_VERSION_6_0) + ret = glusterd_clear_txn_opinfo(txn_id); + + if (rsp_dict) +-- +1.8.3.1 + diff --git a/SOURCES/0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch b/SOURCES/0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch new file mode 100644 index 0000000..2b23236 --- /dev/null +++ b/SOURCES/0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch @@ -0,0 +1,195 @@ +From b1a4947e382c5e2ba1137ed606ecffc69fcf00e9 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Tue, 25 Jun 2019 17:30:17 +0530 +Subject: [PATCH 205/221] glusterd: Can't run rebalance due to long unix socket + +Problem: glusterd populate unix socket file name based + on volname and if volname is lengthy socket + system call's are failed due to breach maximum length + is defined in the kernel. + +Solution:Convert unix socket name to hash to resolve the issue + +> Change-Id: I5072e8184013095587537dbfa4767286307fff65 +> fixes: bz#1720566 +> (Cherry pick from commit 2d7b77eb971700c1073db2b74f5877c1ae8293fc) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22869/) + +BUG: 1720192 +Change-Id: I5072e8184013095587537dbfa4767286307fff65 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174557 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/bugs/glusterd/bug-1720566.t | 50 ++++++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 38 +------------------- + xlators/mgmt/glusterd/src/glusterd.h | 23 +++++------- + 3 files changed, 59 insertions(+), 52 deletions(-) + create mode 100644 tests/bugs/glusterd/bug-1720566.t + +diff --git a/tests/bugs/glusterd/bug-1720566.t b/tests/bugs/glusterd/bug-1720566.t +new file mode 100644 +index 0000000..99bcf6f +--- /dev/null ++++ b/tests/bugs/glusterd/bug-1720566.t +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../volume.rc ++ ++ ++cleanup; ++V0="TestLongVolnamec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9abcd" ++V1="TestLongVolname3102bd28a16c49440bd5210e4ec4d5d93102bd28a16c49440bd5210e4ec4d5d933102bd28a16c49440bd5210e4ebbcd" ++TEST launch_cluster 2; ++TEST $CLI_1 peer probe $H2; ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count ++ ++$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 ++EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status'; ++$CLI_1 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1 ++EXPECT 'Created' cluster_volinfo_field 1 $V1 'Status'; ++ ++$CLI_1 volume start $V0 ++EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status'; ++ ++$CLI_1 volume start $V1 ++EXPECT 'Started' cluster_volinfo_field 1 $V1 'Status'; ++ ++#Mount FUSE ++TEST glusterfs -s $H1 --volfile-id=$V0 $M0; ++ ++ ++#Mount FUSE ++TEST glusterfs -s $H1 --volfile-id=$V1 $M1; ++ ++TEST mkdir $M0/dir{1..4}; ++TEST touch $M0/dir{1..4}/files{1..4}; ++ ++TEST mkdir $M1/dir{1..4}; ++TEST touch $M1/dir{1..4}/files{1..4}; ++ ++TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}_1 $H2:$B2/${V0}_1 ++TEST $CLI_1 volume add-brick $V1 $H1:$B1/${V1}_1 $H2:$B2/${V1}_1 ++ ++ ++TEST $CLI_1 volume rebalance $V0 start ++TEST $CLI_1 volume rebalance $V1 start ++ ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V1 ++ ++cleanup; +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index cbed9a9..b419a89 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -266,18 +266,7 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, + + if (dict_get_strn(this->options, "transport.socket.bind-address", + SLEN("transport.socket.bind-address"), +- &volfileserver) == 0) { +- /*In the case of running multiple glusterds on a single machine, +- *we should ensure that log file and unix socket file should be +- *unique in given cluster */ +- +- GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(sockfile, volinfo, priv); +- snprintf(logfile, PATH_MAX, "%s/%s-%s-%s.log", +- DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname, +- (cmd == GF_DEFRAG_CMD_START_TIER ? "tier" : "rebalance"), +- uuid_utoa(MY_UUID)); +- +- } else { ++ &volfileserver) != 0) { + volfileserver = "localhost"; + } + +@@ -378,9 +367,6 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + glusterd_defrag_info_t *defrag = volinfo->rebal.defrag; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; +- struct stat buf = { +- 0, +- }; + + this = THIS; + GF_ASSERT(this); +@@ -396,28 +382,6 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + goto out; + + GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo); +- /* Check if defrag sockfile exists in the new location +- * in /var/run/ , if it does not try the old location +- */ +- ret = sys_stat(sockfile, &buf); +- /* TODO: Remove this once we don't need backward compatibility +- * with the older path +- */ +- if (ret && (errno == ENOENT)) { +- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +- "Rebalance sockfile " +- "%s does not exist. Trying old path.", +- sockfile); +- GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(sockfile, volinfo, priv); +- ret = sys_stat(sockfile, &buf); +- if (ret && (ENOENT == errno)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REBAL_NO_SOCK_FILE, +- "Rebalance " +- "sockfile %s does not exist", +- sockfile); +- goto out; +- } +- } + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index f96bca3..7d07d33 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -910,30 +910,23 @@ typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args); + } \ + } while (0) + +-#define GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(path, volinfo, priv) \ +- do { \ +- char defrag_path[PATH_MAX]; \ +- int32_t _sockfile_old_len; \ +- GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \ +- _sockfile_old_len = snprintf(path, PATH_MAX, "%s/%s.sock", \ +- defrag_path, uuid_utoa(MY_UUID)); \ +- if ((_sockfile_old_len < 0) || (_sockfile_old_len >= PATH_MAX)) { \ +- path[0] = 0; \ +- } \ +- } while (0) +- + #define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) \ + do { \ + char operation[NAME_MAX]; \ ++ char tmppath[PATH_MAX] = { \ ++ 0, \ ++ }; \ + int32_t _defrag_sockfile_len; \ + GLUSTERD_GET_DEFRAG_PROCESS(operation, volinfo); \ + _defrag_sockfile_len = snprintf( \ +- path, UNIX_PATH_MAX, \ +- DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s.sock", operation, \ +- uuid_utoa(volinfo->volume_id)); \ ++ tmppath, PATH_MAX, \ ++ DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s-%s.sock", operation, \ ++ volinfo->volname, uuid_utoa(MY_UUID)); \ + if ((_defrag_sockfile_len < 0) || \ + (_defrag_sockfile_len >= PATH_MAX)) { \ + path[0] = 0; \ ++ } else { \ ++ glusterd_set_socket_filepath(tmppath, path, sizeof(path)); \ + } \ + } while (0) + +-- +1.8.3.1 + diff --git a/SOURCES/0206-glusterd-ignore-user.-options-from-compatibility-che.patch b/SOURCES/0206-glusterd-ignore-user.-options-from-compatibility-che.patch new file mode 100644 index 0000000..8908097 --- /dev/null +++ b/SOURCES/0206-glusterd-ignore-user.-options-from-compatibility-che.patch @@ -0,0 +1,41 @@ +From f77d4a024cb9b17de7d5add064b34adfb0455d17 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 24 Jun 2019 18:32:52 +0530 +Subject: [PATCH 206/221] glusterd: ignore user.* options from compatibility + check in brick mux + +user.* options are just custom and they don't contribute anything in +terms of determining the volume compatibility in brick multiplexing + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22933/ + +>Fixes: bz#1723402 +>Change-Id: Ic7e0181ab72993d29cab345cde64ae1340bf4faf +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1722509 +Change-Id: Ic7e0181ab72993d29cab345cde64ae1340bf4faf +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174589 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 7768b8e..c6e9bb0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2425,6 +2425,9 @@ unsafe_option(dict_t *this, char *key, data_t *value, void *arg) + if (fnmatch("*diagnostics.client-log*", key, 0) == 0) { + return _gf_false; + } ++ if (fnmatch("user.*", key, 0) == 0) { ++ return _gf_false; ++ } + + return _gf_true; + } +-- +1.8.3.1 + diff --git a/SOURCES/0207-glusterd-fix-use-after-free-of-a-dict_t.patch b/SOURCES/0207-glusterd-fix-use-after-free-of-a-dict_t.patch new file mode 100644 index 0000000..5a92d58 --- /dev/null +++ b/SOURCES/0207-glusterd-fix-use-after-free-of-a-dict_t.patch @@ -0,0 +1,44 @@ +From a7a7d497af4230430f8a0cc54d8b49cfea260039 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Tue, 25 Jun 2019 18:00:06 +0200 +Subject: [PATCH 207/221] glusterd: fix use-after-free of a dict_t + +A dict was passed to a function that calls dict_unref() without taking +any additional reference. Given that the same dict is also used after +the function returns, this was causing a use-after-free situation. + +To fix the issue, we simply take an additional reference before calling +the function. + +Upstream patch: +> BUG: 1723890 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22943 +> Change-Id: I98c6b76b08fe3fa6224edf281a26e9ba1ffe3017 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: I98c6b76b08fe3fa6224edf281a26e9ba1ffe3017 +Updates: bz#1722801 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174656 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index c6e9bb0..4c487d0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3697,7 +3697,7 @@ glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, + if (totthread) { + gf_log(this->name, GF_LOG_INFO, + "Finished merger of all dictionraies into single one"); +- dict_arr[totthread++] = peer_data; ++ dict_arr[totthread++] = dict_ref(peer_data); + ret = glusterd_dict_arr_serialize(dict_arr, totthread, buf, length); + gf_log(this->name, GF_LOG_INFO, + "Serialize dictionary data return is %d", ret); +-- +1.8.3.1 + diff --git a/SOURCES/0208-mem-pool-remove-dead-code.patch b/SOURCES/0208-mem-pool-remove-dead-code.patch new file mode 100644 index 0000000..c8678f2 --- /dev/null +++ b/SOURCES/0208-mem-pool-remove-dead-code.patch @@ -0,0 +1,161 @@ +From d7ddc1cd3af86198ffca2d1958871d4c2c04bd9e Mon Sep 17 00:00:00 2001 +From: Yaniv Kaul <ykaul@redhat.com> +Date: Thu, 21 Mar 2019 19:51:30 +0200 +Subject: [PATCH 208/221] mem-pool: remove dead code. + +Upstream patch: +> Change-Id: I3bbda719027b45e1289db2e6a718627141bcbdc8 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22394 +> BUG: 1193929 +> Signed-off-by: Yaniv Kaul <ykaul@redhat.com> + +Updates: bz#1722801 +Change-Id: I3bbda719027b45e1289db2e6a718627141bcbdc8 +Signed-off-by: Yaniv Kaul <ykaul@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174710 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/glusterfs/mem-pool.h | 11 ------ + libglusterfs/src/mem-pool.c | 70 ----------------------------------- + 2 files changed, 81 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h +index 90905fb..0250b59 100644 +--- a/libglusterfs/src/glusterfs/mem-pool.h ++++ b/libglusterfs/src/glusterfs/mem-pool.h +@@ -308,15 +308,4 @@ mem_pool_destroy(struct mem_pool *pool); + void + gf_mem_acct_enable_set(void *ctx); + +-/* hit will be set to : +- * _gf_true if the memory is served from mem pool +- * _gf_false if the requested size was not present in mem pool and hence +- * std alloc'd. +- */ +-void * +-mem_pool_get(unsigned long sizeof_type, gf_boolean_t *hit); +- +-void * +-mem_pool_get0(unsigned long sizeof_type, gf_boolean_t *hit); +- + #endif /* _MEM_POOL_H */ +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 3934a78..9b4ea52 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -365,10 +365,6 @@ static size_t pool_list_size; + #define N_COLD_LISTS 1024 + #define POOL_SWEEP_SECS 30 + +-static unsigned long sweep_times; +-static unsigned long sweep_usecs; +-static unsigned long frees_to_system; +- + typedef struct { + struct list_head death_row; + pooled_obj_hdr_t *cold_lists[N_COLD_LISTS]; +@@ -426,7 +422,6 @@ free_obj_list(pooled_obj_hdr_t *victim) + next = victim->next; + free(victim); + victim = next; +- ++frees_to_system; + } + } + +@@ -438,9 +433,6 @@ pool_sweeper(void *arg) + per_thread_pool_list_t *next_pl; + per_thread_pool_t *pt_pool; + unsigned int i; +- struct timeval begin_time; +- struct timeval end_time; +- struct timeval elapsed; + gf_boolean_t poisoned; + + /* +@@ -457,7 +449,6 @@ pool_sweeper(void *arg) + state.n_cold_lists = 0; + + /* First pass: collect stuff that needs our attention. */ +- (void)gettimeofday(&begin_time, NULL); + (void)pthread_mutex_lock(&pool_lock); + list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list) + { +@@ -470,10 +461,6 @@ pool_sweeper(void *arg) + } + } + (void)pthread_mutex_unlock(&pool_lock); +- (void)gettimeofday(&end_time, NULL); +- timersub(&end_time, &begin_time, &elapsed); +- sweep_usecs += elapsed.tv_sec * 1000000 + elapsed.tv_usec; +- sweep_times += 1; + + /* Second pass: free dead pools. */ + (void)pthread_mutex_lock(&pool_free_lock); +@@ -879,63 +866,6 @@ mem_get(struct mem_pool *mem_pool) + #endif /* GF_DISABLE_MEMPOOL */ + } + +-void * +-mem_pool_get(unsigned long sizeof_type, gf_boolean_t *hit) +-{ +-#if defined(GF_DISABLE_MEMPOOL) +- return GF_MALLOC(sizeof_type, gf_common_mt_mem_pool); +-#else +- pooled_obj_hdr_t *retval; +- unsigned int power; +- struct mem_pool_shared *pool = NULL; +- +- if (!sizeof_type) { +- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, +- "invalid argument"); +- return NULL; +- } +- +- /* We ensure sizeof_type > 1 and the next power of two will be, at least, +- * 2^POOL_SMALLEST */ +- sizeof_type |= (1 << POOL_SMALLEST) - 1; +- power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1; +- if (power > POOL_LARGEST) { +- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, +- "invalid argument"); +- return NULL; +- } +- pool = &pools[power - POOL_SMALLEST]; +- +- retval = mem_get_from_pool(NULL, pool, hit); +- +- return retval + 1; +-#endif /* GF_DISABLE_MEMPOOL */ +-} +- +-void * +-mem_pool_get0(unsigned long sizeof_type, gf_boolean_t *hit) +-{ +- void *ptr = NULL; +- unsigned int power; +- struct mem_pool_shared *pool = NULL; +- +- ptr = mem_pool_get(sizeof_type, hit); +- if (ptr) { +-#if defined(GF_DISABLE_MEMPOOL) +- memset(ptr, 0, sizeof_type); +-#else +- /* We ensure sizeof_type > 1 and the next power of two will be, at +- * least, 2^POOL_SMALLEST */ +- sizeof_type |= (1 << POOL_SMALLEST) - 1; +- power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1; +- pool = &pools[power - POOL_SMALLEST]; +- memset(ptr, 0, AVAILABLE_SIZE(pool->power_of_two)); +-#endif +- } +- +- return ptr; +-} +- + void + mem_put(void *ptr) + { +-- +1.8.3.1 + diff --git a/SOURCES/0209-core-avoid-dynamic-TLS-allocation-when-possible.patch b/SOURCES/0209-core-avoid-dynamic-TLS-allocation-when-possible.patch new file mode 100644 index 0000000..f46f1b6 --- /dev/null +++ b/SOURCES/0209-core-avoid-dynamic-TLS-allocation-when-possible.patch @@ -0,0 +1,1059 @@ +From 2f5969a77493814e242e6bac3c6bf7acf3202e0f Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Tue, 5 Mar 2019 18:58:20 +0100 +Subject: [PATCH 209/221] core: avoid dynamic TLS allocation when possible + +Some interdependencies between logging and memory management functions +make it impossible to use the logging framework before initializing +memory subsystem because they both depend on Thread Local Storage +allocated through pthread_key_create() during initialization. + +This causes a crash when we try to log something very early in the +initialization phase. + +To prevent this, several dynamically allocated TLS structures have +been replaced by static TLS reserved at compile time using '__thread' +keyword. This also reduces the number of error sources, making +initialization simpler. + +Upstream patch: +> BUG: 1193929 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22302 +> Change-Id: I8ea2e072411e30790d50084b6b7e909c7bb01d50 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: I8ea2e072411e30790d50084b6b7e909c7bb01d50 +Updates: bz#1722801 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174711 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + api/src/glfs.c | 3 +- + cli/src/cli.c | 3 +- + glusterfsd/src/glusterfsd.c | 4 +- + libglusterfs/src/globals.c | 289 ++++----------------- + libglusterfs/src/glusterfs/globals.h | 6 +- + libglusterfs/src/glusterfs/mem-pool.h | 7 +- + libglusterfs/src/libglusterfs.sym | 3 +- + libglusterfs/src/mem-pool.c | 98 +++---- + libglusterfs/src/syncop.c | 133 ++-------- + .../changelog/lib/src/gf-changelog-helpers.c | 51 +--- + xlators/features/changelog/lib/src/gf-changelog.c | 3 +- + xlators/nfs/server/src/mount3udp_svc.c | 6 +- + 12 files changed, 114 insertions(+), 492 deletions(-) + +diff --git a/api/src/glfs.c b/api/src/glfs.c +index 6bbb620..f36616d 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -829,8 +829,7 @@ pub_glfs_new(const char *volname) + * Do this as soon as possible in case something else depends on + * pool allocations. + */ +- mem_pools_init_early(); +- mem_pools_init_late(); ++ mem_pools_init(); + + fs = glfs_new_fs(volname); + if (!fs) +diff --git a/cli/src/cli.c b/cli/src/cli.c +index ff39a98..99a16a0 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -795,8 +795,7 @@ main(int argc, char *argv[]) + int ret = -1; + glusterfs_ctx_t *ctx = NULL; + +- mem_pools_init_early(); +- mem_pools_init_late(); ++ mem_pools_init(); + + ctx = glusterfs_ctx_new(); + if (!ctx) +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 6aee4c1..2172af4 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -2722,8 +2722,6 @@ main(int argc, char *argv[]) + }; + cmd_args_t *cmd = NULL; + +- mem_pools_init_early(); +- + gf_check_and_set_mem_acct(argc, argv); + + ctx = glusterfs_ctx_new(); +@@ -2838,7 +2836,7 @@ main(int argc, char *argv[]) + * the parent, but we want to do it as soon as possible after that in + * case something else depends on pool allocations. + */ +- mem_pools_init_late(); ++ mem_pools_init(); + + #ifdef GF_LINUX_HOST_OS + ret = set_oom_score_adj(ctx); +diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c +index 4fec063..02098e6 100644 +--- a/libglusterfs/src/globals.c ++++ b/libglusterfs/src/globals.c +@@ -99,16 +99,19 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { + glusterfs_ctx_t *global_ctx = NULL; + pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; + xlator_t global_xlator; +-static pthread_key_t this_xlator_key; +-static pthread_key_t synctask_key; +-static pthread_key_t uuid_buf_key; +-static char global_uuid_buf[GF_UUID_BUF_SIZE]; +-static pthread_key_t lkowner_buf_key; +-static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE]; +-static pthread_key_t leaseid_buf_key; + static int gf_global_mem_acct_enable = 1; + static pthread_once_t globals_inited = PTHREAD_ONCE_INIT; + ++static pthread_key_t free_key; ++ ++static __thread xlator_t *thread_xlator = NULL; ++static __thread void *thread_synctask = NULL; ++static __thread void *thread_leaseid = NULL; ++static __thread struct syncopctx thread_syncopctx = {}; ++static __thread char thread_uuid_buf[GF_UUID_BUF_SIZE] = {}; ++static __thread char thread_lkowner_buf[GF_LKOWNER_BUF_SIZE] = {}; ++static __thread char thread_leaseid_buf[GF_LEASE_ID_BUF_SIZE] = {}; ++ + int + gf_global_mem_acct_enable_get(void) + { +@@ -122,12 +125,6 @@ gf_global_mem_acct_enable_set(int val) + return 0; + } + +-void +-glusterfs_this_destroy(void *ptr) +-{ +- FREE(ptr); +-} +- + static struct xlator_cbks global_cbks = { + .forget = NULL, + .release = NULL, +@@ -212,18 +209,9 @@ struct volume_options global_xl_options[] = { + + static volume_opt_list_t global_xl_opt_list; + +-int ++void + glusterfs_this_init() + { +- int ret = 0; +- ret = pthread_key_create(&this_xlator_key, glusterfs_this_destroy); +- if (ret != 0) { +- gf_msg("", GF_LOG_WARNING, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED, +- "failed to create " +- "the pthread key"); +- return ret; +- } +- + global_xlator.name = "glusterfs"; + global_xlator.type = GF_GLOBAL_XLATOR_NAME; + global_xlator.cbks = &global_cbks; +@@ -237,301 +225,120 @@ glusterfs_this_init() + global_xl_opt_list.given_opt = global_xl_options; + + list_add_tail(&global_xl_opt_list.list, &global_xlator.volume_options); +- +- return ret; + } + + xlator_t ** + __glusterfs_this_location() + { +- xlator_t **this_location = NULL; +- int ret = 0; +- +- this_location = pthread_getspecific(this_xlator_key); +- +- if (!this_location) { +- this_location = CALLOC(1, sizeof(*this_location)); +- if (!this_location) +- goto out; ++ xlator_t **this_location; + +- ret = pthread_setspecific(this_xlator_key, this_location); +- if (ret != 0) { +- FREE(this_location); +- this_location = NULL; +- goto out; +- } +- } +-out: +- if (this_location) { +- if (!*this_location) +- *this_location = &global_xlator; ++ this_location = &thread_xlator; ++ if (*this_location == NULL) { ++ thread_xlator = &global_xlator; + } ++ + return this_location; + } + + xlator_t * + glusterfs_this_get() + { +- xlator_t **this_location = NULL; +- +- this_location = __glusterfs_this_location(); +- if (!this_location) +- return &global_xlator; +- +- return *this_location; ++ return *__glusterfs_this_location(); + } + +-int ++void + glusterfs_this_set(xlator_t *this) + { +- xlator_t **this_location = NULL; +- +- this_location = __glusterfs_this_location(); +- if (!this_location) +- return -ENOMEM; +- +- *this_location = this; +- +- return 0; ++ thread_xlator = this; + } + + /* SYNCOPCTX */ +-static pthread_key_t syncopctx_key; +- +-static void +-syncopctx_key_destroy(void *ptr) +-{ +- struct syncopctx *opctx = ptr; +- +- if (opctx) { +- if (opctx->groups) +- GF_FREE(opctx->groups); +- +- GF_FREE(opctx); +- } +- +- return; +-} + + void * + syncopctx_getctx() + { +- void *opctx = NULL; +- +- opctx = pthread_getspecific(syncopctx_key); +- +- return opctx; +-} +- +-int +-syncopctx_setctx(void *ctx) +-{ +- int ret = 0; +- +- ret = pthread_setspecific(syncopctx_key, ctx); +- +- return ret; +-} +- +-static int +-syncopctx_init(void) +-{ +- int ret; +- +- ret = pthread_key_create(&syncopctx_key, syncopctx_key_destroy); +- +- return ret; ++ return &thread_syncopctx; + } + + /* SYNCTASK */ + +-int +-synctask_init() +-{ +- int ret = 0; +- +- ret = pthread_key_create(&synctask_key, NULL); +- +- return ret; +-} +- + void * + synctask_get() + { +- void *synctask = NULL; +- +- synctask = pthread_getspecific(synctask_key); +- +- return synctask; ++ return thread_synctask; + } + +-int ++void + synctask_set(void *synctask) + { +- int ret = 0; +- +- pthread_setspecific(synctask_key, synctask); +- +- return ret; ++ thread_synctask = synctask; + } + + // UUID_BUFFER + +-void +-glusterfs_uuid_buf_destroy(void *ptr) +-{ +- FREE(ptr); +-} +- +-int +-glusterfs_uuid_buf_init() +-{ +- int ret = 0; +- +- ret = pthread_key_create(&uuid_buf_key, glusterfs_uuid_buf_destroy); +- return ret; +-} +- + char * + glusterfs_uuid_buf_get() + { +- char *buf; +- int ret = 0; +- +- buf = pthread_getspecific(uuid_buf_key); +- if (!buf) { +- buf = MALLOC(GF_UUID_BUF_SIZE); +- ret = pthread_setspecific(uuid_buf_key, (void *)buf); +- if (ret) +- buf = global_uuid_buf; +- } +- return buf; ++ return thread_uuid_buf; + } + + /* LKOWNER_BUFFER */ + +-void +-glusterfs_lkowner_buf_destroy(void *ptr) +-{ +- FREE(ptr); +-} +- +-int +-glusterfs_lkowner_buf_init() +-{ +- int ret = 0; +- +- ret = pthread_key_create(&lkowner_buf_key, glusterfs_lkowner_buf_destroy); +- return ret; +-} +- + char * + glusterfs_lkowner_buf_get() + { +- char *buf; +- int ret = 0; +- +- buf = pthread_getspecific(lkowner_buf_key); +- if (!buf) { +- buf = MALLOC(GF_LKOWNER_BUF_SIZE); +- ret = pthread_setspecific(lkowner_buf_key, (void *)buf); +- if (ret) +- buf = global_lkowner_buf; +- } +- return buf; ++ return thread_lkowner_buf; + } + + /* Leaseid buffer */ +-void +-glusterfs_leaseid_buf_destroy(void *ptr) +-{ +- FREE(ptr); +-} +- +-int +-glusterfs_leaseid_buf_init() +-{ +- int ret = 0; +- +- ret = pthread_key_create(&leaseid_buf_key, glusterfs_leaseid_buf_destroy); +- return ret; +-} + + char * + glusterfs_leaseid_buf_get() + { + char *buf = NULL; +- int ret = 0; + +- buf = pthread_getspecific(leaseid_buf_key); +- if (!buf) { +- buf = CALLOC(1, GF_LEASE_ID_BUF_SIZE); +- ret = pthread_setspecific(leaseid_buf_key, (void *)buf); +- if (ret) { +- FREE(buf); +- buf = NULL; +- } ++ buf = thread_leaseid; ++ if (buf == NULL) { ++ buf = thread_leaseid_buf; ++ thread_leaseid = buf; + } ++ + return buf; + } + + char * + glusterfs_leaseid_exist() + { +- return pthread_getspecific(leaseid_buf_key); ++ return thread_leaseid; + } + + static void +-gf_globals_init_once() ++glusterfs_cleanup(void *ptr) + { +- int ret = 0; +- +- ret = glusterfs_this_init(); +- if (ret) { +- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_TRANSLATOR_INIT_FAILED, +- "ERROR: glusterfs-translator init failed"); +- goto out; +- } +- +- ret = glusterfs_uuid_buf_init(); +- if (ret) { +- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UUID_BUF_INIT_FAILED, +- "ERROR: glusterfs uuid buffer init failed"); +- goto out; ++ if (thread_syncopctx.groups != NULL) { ++ GF_FREE(thread_syncopctx.groups); + } + +- ret = glusterfs_lkowner_buf_init(); +- if (ret) { +- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LKOWNER_BUF_INIT_FAILED, +- "ERROR: glusterfs lkowner buffer init failed"); +- goto out; +- } ++ mem_pool_thread_destructor(); ++} + +- ret = glusterfs_leaseid_buf_init(); +- if (ret) { +- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_LEASEID_BUF_INIT_FAILED, +- "ERROR: glusterfs leaseid buffer init failed"); +- goto out; +- } ++static void ++gf_globals_init_once() ++{ ++ int ret = 0; + +- ret = synctask_init(); +- if (ret) { +- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCTASK_INIT_FAILED, +- "ERROR: glusterfs synctask init failed"); +- goto out; +- } ++ glusterfs_this_init(); + +- ret = syncopctx_init(); +- if (ret) { +- gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_SYNCOPCTX_INIT_FAILED, +- "ERROR: glusterfs syncopctx init failed"); +- goto out; +- } +-out: ++ /* This is needed only to cleanup the potential allocation of ++ * thread_syncopctx.groups. */ ++ ret = pthread_key_create(&free_key, glusterfs_cleanup); ++ if (ret != 0) { ++ gf_msg("", GF_LOG_ERROR, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED, ++ "failed to create the pthread key"); + +- if (ret) { + gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_GLOBAL_INIT_FAILED, + "Exiting as global initialization failed"); ++ + exit(ret); + } + } +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index e45db14..55476f6 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -147,7 +147,7 @@ xlator_t ** + __glusterfs_this_location(void); + xlator_t * + glusterfs_this_get(void); +-int ++void + glusterfs_this_set(xlator_t *); + + extern xlator_t global_xlator; +@@ -156,13 +156,11 @@ extern struct volume_options global_xl_options[]; + /* syncopctx */ + void * + syncopctx_getctx(void); +-int +-syncopctx_setctx(void *ctx); + + /* task */ + void * + synctask_get(void); +-int ++void + synctask_set(void *); + + /* uuid_buf */ +diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h +index 0250b59..c5a486b 100644 +--- a/libglusterfs/src/glusterfs/mem-pool.h ++++ b/libglusterfs/src/glusterfs/mem-pool.h +@@ -279,9 +279,7 @@ struct mem_pool_shared { + }; + + void +-mem_pools_init_early(void); /* basic initialization of memory pools */ +-void +-mem_pools_init_late(void); /* start the pool_sweeper thread */ ++mem_pools_init(void); /* start the pool_sweeper thread */ + void + mem_pools_fini(void); /* cleanup memory pools */ + +@@ -306,6 +304,9 @@ void + mem_pool_destroy(struct mem_pool *pool); + + void ++mem_pool_thread_destructor(void); ++ ++void + gf_mem_acct_enable_set(void *ctx); + + #endif /* _MEM_POOL_H */ +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 7a2edef..86215d2 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -872,8 +872,7 @@ mem_get0 + mem_pool_destroy + mem_pool_new_fn + mem_pools_fini +-mem_pools_init_early +-mem_pools_init_late ++mem_pools_init + mem_put + mkdir_p + next_token +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 9b4ea52..ab78804 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -353,7 +353,6 @@ free: + FREE(ptr); + } + +-static pthread_key_t pool_key; + static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER; + static struct list_head pool_threads; + static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER; +@@ -361,6 +360,8 @@ static struct list_head pool_free_threads; + static struct mem_pool_shared pools[NPOOLS]; + static size_t pool_list_size; + ++static __thread per_thread_pool_list_t *thread_pool_list = NULL; ++ + #if !defined(GF_DISABLE_MEMPOOL) + #define N_COLD_LISTS 1024 + #define POOL_SWEEP_SECS 30 +@@ -373,7 +374,6 @@ typedef struct { + + enum init_state { + GF_MEMPOOL_INIT_NONE = 0, +- GF_MEMPOOL_INIT_PREINIT, + GF_MEMPOOL_INIT_EARLY, + GF_MEMPOOL_INIT_LATE, + GF_MEMPOOL_INIT_DESTROY +@@ -486,9 +486,9 @@ pool_sweeper(void *arg) + } + + void +-pool_destructor(void *arg) ++mem_pool_thread_destructor(void) + { +- per_thread_pool_list_t *pool_list = arg; ++ per_thread_pool_list_t *pool_list = thread_pool_list; + + /* The pool-sweeper thread will take it from here. + * +@@ -499,7 +499,10 @@ pool_destructor(void *arg) + * This change can modify what mem_put() does, but both possibilities are + * fine until the sweeper thread kicks in. The real synchronization must be + * between mem_put() and the sweeper thread. */ +- pool_list->poison = 1; ++ if (pool_list != NULL) { ++ pool_list->poison = 1; ++ thread_pool_list = NULL; ++ } + } + + static __attribute__((constructor)) void +@@ -522,46 +525,14 @@ mem_pools_preinit(void) + pool_list_size = sizeof(per_thread_pool_list_t) + + sizeof(per_thread_pool_t) * (NPOOLS - 1); + +- init_done = GF_MEMPOOL_INIT_PREINIT; ++ init_done = GF_MEMPOOL_INIT_EARLY; + } + +-/* Use mem_pools_init_early() function for basic initialization. There will be +- * no cleanup done by the pool_sweeper thread until mem_pools_init_late() has +- * been called. Calling mem_get() will be possible after this function has +- * setup the basic structures. */ ++/* Call mem_pools_init() once threading has been configured completely. This ++ * prevent the pool_sweeper thread from getting killed once the main() thread ++ * exits during deamonizing. */ + void +-mem_pools_init_early(void) +-{ +- pthread_mutex_lock(&init_mutex); +- /* Use a pthread_key destructor to clean up when a thread exits. +- * +- * We won't increase init_count here, that is only done when the +- * pool_sweeper thread is started too. +- */ +- if (init_done == GF_MEMPOOL_INIT_PREINIT || +- init_done == GF_MEMPOOL_INIT_DESTROY) { +- /* key has not been created yet */ +- if (pthread_key_create(&pool_key, pool_destructor) != 0) { +- gf_log("mem-pool", GF_LOG_CRITICAL, +- "failed to initialize mem-pool key"); +- } +- +- init_done = GF_MEMPOOL_INIT_EARLY; +- } else { +- gf_log("mem-pool", GF_LOG_CRITICAL, +- "incorrect order of mem-pool initialization " +- "(init_done=%d)", +- init_done); +- } +- +- pthread_mutex_unlock(&init_mutex); +-} +- +-/* Call mem_pools_init_late() once threading has been configured completely. +- * This prevent the pool_sweeper thread from getting killed once the main() +- * thread exits during deamonizing. */ +-void +-mem_pools_init_late(void) ++mem_pools_init(void) + { + pthread_mutex_lock(&init_mutex); + if ((init_count++) == 0) { +@@ -580,13 +551,12 @@ mem_pools_fini(void) + switch (init_count) { + case 0: + /* +- * If init_count is already zero (as e.g. if somebody called +- * this before mem_pools_init_late) then the sweeper was +- * probably never even started so we don't need to stop it. +- * Even if there's some crazy circumstance where there is a +- * sweeper but init_count is still zero, that just means we'll +- * leave it running. Not perfect, but far better than any +- * known alternative. ++ * If init_count is already zero (as e.g. if somebody called this ++ * before mem_pools_init) then the sweeper was probably never even ++ * started so we don't need to stop it. Even if there's some crazy ++ * circumstance where there is a sweeper but init_count is still ++ * zero, that just means we'll leave it running. Not perfect, but ++ * far better than any known alternative. + */ + break; + case 1: { +@@ -594,20 +564,17 @@ mem_pools_fini(void) + per_thread_pool_list_t *next_pl; + unsigned int i; + +- /* if only mem_pools_init_early() was called, sweeper_tid will +- * be invalid and the functions will error out. That is not +- * critical. In all other cases, the sweeper_tid will be valid +- * and the thread gets stopped. */ ++ /* if mem_pools_init() was not called, sweeper_tid will be invalid ++ * and the functions will error out. That is not critical. In all ++ * other cases, the sweeper_tid will be valid and the thread gets ++ * stopped. */ + (void)pthread_cancel(sweeper_tid); + (void)pthread_join(sweeper_tid, NULL); + +- /* Need to clean the pool_key to prevent further usage of the +- * per_thread_pool_list_t structure that is stored for each +- * thread. +- * This also prevents calling pool_destructor() when a thread +- * exits, so there is no chance on a use-after-free of the +- * per_thread_pool_list_t structure. */ +- (void)pthread_key_delete(pool_key); ++ /* At this point all threads should have already terminated, so ++ * it should be safe to destroy all pending per_thread_pool_list_t ++ * structures that are stored for each thread. */ ++ mem_pool_thread_destructor(); + + /* free all objects from all pools */ + list_for_each_entry_safe(pool_list, next_pl, &pool_threads, +@@ -642,11 +609,7 @@ mem_pools_fini(void) + + #else + void +-mem_pools_init_early(void) +-{ +-} +-void +-mem_pools_init_late(void) ++mem_pools_init(void) + { + } + void +@@ -734,7 +697,7 @@ mem_get_pool_list(void) + per_thread_pool_list_t *pool_list; + unsigned int i; + +- pool_list = pthread_getspecific(pool_key); ++ pool_list = thread_pool_list; + if (pool_list) { + return pool_list; + } +@@ -767,7 +730,8 @@ mem_get_pool_list(void) + list_add(&pool_list->thr_list, &pool_threads); + (void)pthread_mutex_unlock(&pool_lock); + +- (void)pthread_setspecific(pool_key, pool_list); ++ thread_pool_list = pool_list; ++ + return pool_list; + } + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index c05939a..2eb7b49 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -26,28 +26,10 @@ syncopctx_setfsuid(void *uid) + + opctx = syncopctx_getctx(); + +- /* alloc for this thread the first time */ +- if (!opctx) { +- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); +- if (!opctx) { +- ret = -1; +- goto out; +- } +- +- ret = syncopctx_setctx(opctx); +- if (ret != 0) { +- GF_FREE(opctx); +- opctx = NULL; +- goto out; +- } +- } ++ opctx->uid = *(uid_t *)uid; ++ opctx->valid |= SYNCOPCTX_UID; + + out: +- if (opctx && uid) { +- opctx->uid = *(uid_t *)uid; +- opctx->valid |= SYNCOPCTX_UID; +- } +- + return ret; + } + +@@ -66,28 +48,10 @@ syncopctx_setfsgid(void *gid) + + opctx = syncopctx_getctx(); + +- /* alloc for this thread the first time */ +- if (!opctx) { +- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); +- if (!opctx) { +- ret = -1; +- goto out; +- } +- +- ret = syncopctx_setctx(opctx); +- if (ret != 0) { +- GF_FREE(opctx); +- opctx = NULL; +- goto out; +- } +- } ++ opctx->gid = *(gid_t *)gid; ++ opctx->valid |= SYNCOPCTX_GID; + + out: +- if (opctx && gid) { +- opctx->gid = *(gid_t *)gid; +- opctx->valid |= SYNCOPCTX_GID; +- } +- + return ret; + } + +@@ -107,43 +71,20 @@ syncopctx_setfsgroups(int count, const void *groups) + + opctx = syncopctx_getctx(); + +- /* alloc for this thread the first time */ +- if (!opctx) { +- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); +- if (!opctx) { +- ret = -1; +- goto out; +- } +- +- ret = syncopctx_setctx(opctx); +- if (ret != 0) { +- GF_FREE(opctx); +- opctx = NULL; +- goto out; +- } +- } +- + /* resize internal groups as required */ + if (count && opctx->grpsize < count) { + if (opctx->groups) { +- tmpgroups = GF_REALLOC(opctx->groups, (sizeof(gid_t) * count)); +- /* NOTE: Not really required to zero the reallocation, +- * as ngrps controls the validity of data, +- * making a note irrespective */ +- if (tmpgroups == NULL) { +- opctx->grpsize = 0; +- GF_FREE(opctx->groups); +- opctx->groups = NULL; +- ret = -1; +- goto out; +- } +- } else { +- tmpgroups = GF_CALLOC(count, sizeof(gid_t), gf_common_mt_syncopctx); +- if (tmpgroups == NULL) { +- opctx->grpsize = 0; +- ret = -1; +- goto out; +- } ++ /* Group list will be updated later, so no need to keep current ++ * data and waste time copying it. It's better to free the current ++ * allocation and then allocate a fresh new memory block. */ ++ GF_FREE(opctx->groups); ++ opctx->groups = NULL; ++ opctx->grpsize = 0; ++ } ++ tmpgroups = GF_MALLOC(count * sizeof(gid_t), gf_common_mt_syncopctx); ++ if (tmpgroups == NULL) { ++ ret = -1; ++ goto out; + } + + opctx->groups = tmpgroups; +@@ -177,28 +118,10 @@ syncopctx_setfspid(void *pid) + + opctx = syncopctx_getctx(); + +- /* alloc for this thread the first time */ +- if (!opctx) { +- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); +- if (!opctx) { +- ret = -1; +- goto out; +- } +- +- ret = syncopctx_setctx(opctx); +- if (ret != 0) { +- GF_FREE(opctx); +- opctx = NULL; +- goto out; +- } +- } ++ opctx->pid = *(pid_t *)pid; ++ opctx->valid |= SYNCOPCTX_PID; + + out: +- if (opctx && pid) { +- opctx->pid = *(pid_t *)pid; +- opctx->valid |= SYNCOPCTX_PID; +- } +- + return ret; + } + +@@ -217,28 +140,10 @@ syncopctx_setfslkowner(gf_lkowner_t *lk_owner) + + opctx = syncopctx_getctx(); + +- /* alloc for this thread the first time */ +- if (!opctx) { +- opctx = GF_CALLOC(1, sizeof(*opctx), gf_common_mt_syncopctx); +- if (!opctx) { +- ret = -1; +- goto out; +- } +- +- ret = syncopctx_setctx(opctx); +- if (ret != 0) { +- GF_FREE(opctx); +- opctx = NULL; +- goto out; +- } +- } ++ opctx->lk_owner = *lk_owner; ++ opctx->valid |= SYNCOPCTX_LKOWNER; + + out: +- if (opctx && lk_owner) { +- opctx->lk_owner = *lk_owner; +- opctx->valid |= SYNCOPCTX_LKOWNER; +- } +- + return ret; + } + +diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c +index 03dac5e..e5a9db4 100644 +--- a/xlators/features/changelog/lib/src/gf-changelog-helpers.c ++++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c +@@ -64,20 +64,7 @@ gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr) + * made a part of libglusterfs. + */ + +-static pthread_key_t rl_key; +-static pthread_once_t rl_once = PTHREAD_ONCE_INIT; +- +-static void +-readline_destructor(void *ptr) +-{ +- GF_FREE(ptr); +-} +- +-static void +-readline_once(void) +-{ +- pthread_key_create(&rl_key, readline_destructor); +-} ++static __thread read_line_t thread_tsd = {}; + + static ssize_t + my_read(read_line_t *tsd, int fd, char *ptr) +@@ -97,27 +84,6 @@ my_read(read_line_t *tsd, int fd, char *ptr) + return 1; + } + +-static int +-gf_readline_init_once(read_line_t **tsd) +-{ +- if (pthread_once(&rl_once, readline_once) != 0) +- return -1; +- +- *tsd = pthread_getspecific(rl_key); +- if (*tsd) +- goto out; +- +- *tsd = GF_CALLOC(1, sizeof(**tsd), gf_changelog_mt_libgfchangelog_rl_t); +- if (!*tsd) +- return -1; +- +- if (pthread_setspecific(rl_key, *tsd) != 0) +- return -1; +- +-out: +- return 0; +-} +- + ssize_t + gf_readline(int fd, void *vptr, size_t maxlen) + { +@@ -125,10 +91,7 @@ gf_readline(int fd, void *vptr, size_t maxlen) + size_t rc = 0; + char c = ' '; + char *ptr = NULL; +- read_line_t *tsd = NULL; +- +- if (gf_readline_init_once(&tsd)) +- return -1; ++ read_line_t *tsd = &thread_tsd; + + ptr = vptr; + for (n = 1; n < maxlen; n++) { +@@ -151,10 +114,7 @@ off_t + gf_lseek(int fd, off_t offset, int whence) + { + off_t off = 0; +- read_line_t *tsd = NULL; +- +- if (gf_readline_init_once(&tsd)) +- return -1; ++ read_line_t *tsd = &thread_tsd; + + off = sys_lseek(fd, offset, whence); + if (off == -1) +@@ -169,10 +129,7 @@ gf_lseek(int fd, off_t offset, int whence) + int + gf_ftruncate(int fd, off_t length) + { +- read_line_t *tsd = NULL; +- +- if (gf_readline_init_once(&tsd)) +- return -1; ++ read_line_t *tsd = &thread_tsd; + + if (sys_ftruncate(fd, 0)) + return -1; +diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c +index 7ed9e55..d6acb37 100644 +--- a/xlators/features/changelog/lib/src/gf-changelog.c ++++ b/xlators/features/changelog/lib/src/gf-changelog.c +@@ -237,9 +237,8 @@ gf_changelog_init_master() + { + int ret = 0; + +- mem_pools_init_early(); + ret = gf_changelog_init_context(); +- mem_pools_init_late(); ++ mem_pools_init(); + + return ret; + } +diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c +index d5e4169..0688779eb 100644 +--- a/xlators/nfs/server/src/mount3udp_svc.c ++++ b/xlators/nfs/server/src/mount3udp_svc.c +@@ -216,11 +216,7 @@ mount3udp_thread(void *argv) + + GF_ASSERT(nfsx); + +- if (glusterfs_this_set(nfsx)) { +- gf_msg(GF_MNT, GF_LOG_ERROR, ENOMEM, NFS_MSG_XLATOR_SET_FAIL, +- "Failed to set xlator, nfs.mount-udp will not work"); +- return NULL; +- } ++ glusterfs_this_set(nfsx); + + transp = svcudp_create(RPC_ANYSOCK); + if (transp == NULL) { +-- +1.8.3.1 + diff --git a/SOURCES/0210-mem-pool.-c-h-minor-changes.patch b/SOURCES/0210-mem-pool.-c-h-minor-changes.patch new file mode 100644 index 0000000..c238579 --- /dev/null +++ b/SOURCES/0210-mem-pool.-c-h-minor-changes.patch @@ -0,0 +1,129 @@ +From 77a3cac0c8aed9e084296719926a534128c31dee Mon Sep 17 00:00:00 2001 +From: Yaniv Kaul <ykaul@redhat.com> +Date: Wed, 27 Feb 2019 15:48:42 +0200 +Subject: [PATCH 210/221] mem-pool.{c|h}: minor changes + +1. Removed some code that was not needed. It did not really do anything. +2. CALLOC -> MALLOC in one place. + +Compile-tested only! + +Upstream patch: +> BUG: 1193929 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22274 +> Signed-off-by: Yaniv Kaul <ykaul@redhat.com> +> Change-Id: I4419161e1bb636158e32b5d33044b06f1eef2449 + +Change-Id: I4419161e1bb636158e32b5d33044b06f1eef2449 +Updates: bz#1722801 +Signed-off-by: Yaniv Kaul <ykaul@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174712 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/mem-pool.c | 37 ++++++++++++------------------------- + 1 file changed, 12 insertions(+), 25 deletions(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index ab78804..ca25ffc 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -643,7 +643,7 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, + } + pool = &pools[power - POOL_SMALLEST]; + +- new = GF_CALLOC(sizeof(struct mem_pool), 1, gf_common_mt_mem_pool); ++ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool); + if (!new) + return NULL; + +@@ -671,15 +671,7 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, + void * + mem_get0(struct mem_pool *mem_pool) + { +- void *ptr = NULL; +- +- if (!mem_pool) { +- gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, +- "invalid argument"); +- return NULL; +- } +- +- ptr = mem_get(mem_pool); ++ void *ptr = mem_get(mem_pool); + if (ptr) { + #if defined(GF_DISABLE_MEMPOOL) + memset(ptr, 0, mem_pool->sizeof_type); +@@ -736,12 +728,14 @@ mem_get_pool_list(void) + } + + pooled_obj_hdr_t * +-mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool, +- gf_boolean_t *hit) ++mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool) + { + per_thread_pool_list_t *pool_list; + per_thread_pool_t *pt_pool; + pooled_obj_hdr_t *retval; ++#ifdef DEBUG ++ gf_boolean_t hit = _gf_true; ++#endif + + pool_list = mem_get_pool_list(); + if (!pool_list || pool_list->poison) { +@@ -755,10 +749,6 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool, + pt_pool = &pool_list->pools[pool->power_of_two - POOL_SMALLEST]; + } + +-#ifdef DEBUG +- *hit = _gf_true; +-#endif +- + (void)pthread_spin_lock(&pool_list->lock); + + retval = pt_pool->hot_list; +@@ -778,7 +768,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool, + retval = malloc((1 << pt_pool->parent->power_of_two) + + sizeof(pooled_obj_hdr_t)); + #ifdef DEBUG +- *hit = _gf_false; ++ hit = _gf_false; + #endif + } + } +@@ -788,7 +778,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool, + retval->pool = mem_pool; + retval->power_of_two = mem_pool->pool->power_of_two; + #ifdef DEBUG +- if (*hit == _gf_true) ++ if (hit == _gf_true) + GF_ATOMIC_INC(mem_pool->hit); + else + GF_ATOMIC_INC(mem_pool->miss); +@@ -807,19 +797,16 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool, + void * + mem_get(struct mem_pool *mem_pool) + { +-#if defined(GF_DISABLE_MEMPOOL) +- return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool); +-#else +- pooled_obj_hdr_t *retval; +- gf_boolean_t hit; +- + if (!mem_pool) { + gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, + "invalid argument"); + return NULL; + } + +- retval = mem_get_from_pool(mem_pool, NULL, &hit); ++#if defined(GF_DISABLE_MEMPOOL) ++ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool); ++#else ++ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool, NULL); + if (!retval) { + return NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch b/SOURCES/0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch new file mode 100644 index 0000000..27326a9 --- /dev/null +++ b/SOURCES/0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch @@ -0,0 +1,41 @@ +From 4fa3c0be983c3f99c2785036ded5ef5ab390419b Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 6 May 2019 15:57:16 +0530 +Subject: [PATCH 211/221] libglusterfs: Fix compilation when --disable-mempool + is used + +Upstream patch: +> BUG: 1193929 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22665 +> Change-Id: I245c065b209bcce5db939b6a0a934ba6fd393b47 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +Updates: bz#1722801 +Change-Id: I245c065b209bcce5db939b6a0a934ba6fd393b47 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174713 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/mem-pool.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index ca25ffc..df167b6 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -616,6 +616,11 @@ void + mem_pools_fini(void) + { + } ++void ++mem_pool_thread_destructor(void) ++{ ++} ++ + #endif + + struct mem_pool * +-- +1.8.3.1 + diff --git a/SOURCES/0212-core-fix-memory-allocation-issues.patch b/SOURCES/0212-core-fix-memory-allocation-issues.patch new file mode 100644 index 0000000..18da11d --- /dev/null +++ b/SOURCES/0212-core-fix-memory-allocation-issues.patch @@ -0,0 +1,169 @@ +From 0bf728030e0ad7a49e6e1737ea06ae74da9279d3 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 21 Jun 2019 11:28:08 +0200 +Subject: [PATCH 212/221] core: fix memory allocation issues + +Two problems have been identified that caused that gluster's memory +usage were twice higher than required. + +1. An off by 1 error caused that all objects allocated from the memory + pools were taken from a pool bigger than required. Since each pool + corresponds to a size equal to a power of two, this was wasting half + of the available memory. + +2. The header information used for accounting on each memory object was + not taken into consideration when searching for a suitable memory + pool. It was added later when each individual block was allocated. + This made this space "invisible" to memory accounting. + +Credits: Thanks to Nithya Balachandran for identifying this problem and + testing this patch. + +Upstream patch: +> BUG: 1722802 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22921 +> Change-Id: I90e27ad795fe51ca11c13080f62207451f6c138c +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Fixes: bz#1722801 +Change-Id: I90e27ad795fe51ca11c13080f62207451f6c138c +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174714 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/glusterfs/mem-pool.h | 5 ++- + libglusterfs/src/mem-pool.c | 57 +++++++++++++++++++---------------- + 2 files changed, 35 insertions(+), 27 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h +index c5a486b..be0a26d 100644 +--- a/libglusterfs/src/glusterfs/mem-pool.h ++++ b/libglusterfs/src/glusterfs/mem-pool.h +@@ -231,7 +231,10 @@ typedef struct pooled_obj_hdr { + struct mem_pool *pool; + } pooled_obj_hdr_t; + +-#define AVAILABLE_SIZE(p2) (1 << (p2)) ++/* Each memory block inside a pool has a fixed size that is a power of two. ++ * However each object will have a header that will reduce the available ++ * space. */ ++#define AVAILABLE_SIZE(p2) ((1UL << (p2)) - sizeof(pooled_obj_hdr_t)) + + typedef struct per_thread_pool { + /* the pool that was used to request this allocation */ +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index df167b6..d88041d 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -627,6 +627,7 @@ struct mem_pool * + mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, + unsigned long count, char *name) + { ++ unsigned long extra_size, size; + unsigned int power; + struct mem_pool *new = NULL; + struct mem_pool_shared *pool = NULL; +@@ -637,10 +638,25 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type, + return NULL; + } + +- /* We ensure sizeof_type > 1 and the next power of two will be, at least, +- * 2^POOL_SMALLEST */ +- sizeof_type |= (1 << POOL_SMALLEST) - 1; +- power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1; ++ /* This is the overhead we'll have because of memory accounting for each ++ * memory block. */ ++ extra_size = sizeof(pooled_obj_hdr_t); ++ ++ /* We need to compute the total space needed to hold the data type and ++ * the header. Given that the smallest block size we have in the pools ++ * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST). ++ * However, since this value is only needed to compute its rounded ++ * logarithm in base 2, and this only depends on the highest bit set, ++ * we can simply do a bitwise or with the minimum size. We need to ++ * subtract 1 for correct handling of sizes that are exactly a power ++ * of 2. */ ++ size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL); ++ ++ /* We compute the logarithm in base 2 rounded up of the resulting size. ++ * This value will identify which pool we need to use from the pools of ++ * powers of 2. This is equivalent to finding the position of the highest ++ * bit set. */ ++ power = sizeof(size) * 8 - __builtin_clzl(size); + if (power > POOL_LARGEST) { + gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, + "invalid argument"); +@@ -732,8 +748,8 @@ mem_get_pool_list(void) + return pool_list; + } + +-pooled_obj_hdr_t * +-mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool) ++static pooled_obj_hdr_t * ++mem_get_from_pool(struct mem_pool *mem_pool) + { + per_thread_pool_list_t *pool_list; + per_thread_pool_t *pt_pool; +@@ -747,12 +763,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool) + return NULL; + } + +- if (mem_pool) { +- pt_pool = &pool_list +- ->pools[mem_pool->pool->power_of_two - POOL_SMALLEST]; +- } else { +- pt_pool = &pool_list->pools[pool->power_of_two - POOL_SMALLEST]; +- } ++ pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST]; + + (void)pthread_spin_lock(&pool_list->lock); + +@@ -770,8 +781,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool) + } else { + (void)pthread_spin_unlock(&pool_list->lock); + GF_ATOMIC_INC(pt_pool->parent->allocs_stdc); +- retval = malloc((1 << pt_pool->parent->power_of_two) + +- sizeof(pooled_obj_hdr_t)); ++ retval = malloc(1 << pt_pool->parent->power_of_two); + #ifdef DEBUG + hit = _gf_false; + #endif +@@ -779,19 +789,14 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool) + } + + if (retval != NULL) { +- if (mem_pool) { +- retval->pool = mem_pool; +- retval->power_of_two = mem_pool->pool->power_of_two; ++ retval->pool = mem_pool; ++ retval->power_of_two = mem_pool->pool->power_of_two; + #ifdef DEBUG +- if (hit == _gf_true) +- GF_ATOMIC_INC(mem_pool->hit); +- else +- GF_ATOMIC_INC(mem_pool->miss); ++ if (hit == _gf_true) ++ GF_ATOMIC_INC(mem_pool->hit); ++ else ++ GF_ATOMIC_INC(mem_pool->miss); + #endif +- } else { +- retval->power_of_two = pool->power_of_two; +- retval->pool = NULL; +- } + retval->magic = GF_MEM_HEADER_MAGIC; + retval->pool_list = pool_list; + } +@@ -811,7 +816,7 @@ mem_get(struct mem_pool *mem_pool) + #if defined(GF_DISABLE_MEMPOOL) + return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool); + #else +- pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool, NULL); ++ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool); + if (!retval) { + return NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0213-cluster-dht-Strip-out-dht-xattrs.patch b/SOURCES/0213-cluster-dht-Strip-out-dht-xattrs.patch new file mode 100644 index 0000000..225379b --- /dev/null +++ b/SOURCES/0213-cluster-dht-Strip-out-dht-xattrs.patch @@ -0,0 +1,42 @@ +From ff5f06d6ba5ac87094ae5df435d1cfb38802e7ca Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Tue, 18 Jun 2019 15:33:29 +0530 +Subject: [PATCH 213/221] cluster/dht: Strip out dht xattrs + +Some internal DHT xattrs were not being +removed when calling getxattr in pass-through mode. +This has been fixed. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/22889/ + +>Change-Id: If7e3dbc7b495db88a566bd560888e3e9c167defa +>fixes: bz#1721435 +>Signed-off-by: N Balachandran <nbalacha@redhat.com> + + +BUG: 1721357 +Change-Id: I29bce7ea78bb4fd3b493404282cb2c48ef0bf4ee +Signed-off-by: N Balachandran <nbalacha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174699 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index e1edb38..9a6ea5b 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -11216,6 +11216,8 @@ dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + conf = this->private; + dict_del(xattr, conf->xattr_name); ++ dict_del(xattr, conf->mds_xattr_key); ++ dict_del(xattr, conf->commithash_xattr_name); + + if (frame->root->pid >= 0) { + GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr); +-- +1.8.3.1 + diff --git a/SOURCES/0214-geo-rep-Upgrading-config-file-to-new-version.patch b/SOURCES/0214-geo-rep-Upgrading-config-file-to-new-version.patch new file mode 100644 index 0000000..711aa3b --- /dev/null +++ b/SOURCES/0214-geo-rep-Upgrading-config-file-to-new-version.patch @@ -0,0 +1,114 @@ +From 76921775b0a6760276060409882c0556f19d8d01 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Wed, 29 May 2019 16:49:01 +0530 +Subject: [PATCH 214/221] geo-rep: Upgrading config file to new version + +- configuration handling is enhanced with patch +https://review.gluster.org/#/c/glusterfs/+/18257/ +- hence, the old configurations are not applied when +Geo-rep session is created in the old version and upgraded. + +This patch solves the issue. It, +- checks if the config file is old. +- parses required values from old config file and stores in new + config file, which ensures that configurations are applied on + upgrade. +- stores old config file as backup. +- handles changes in options introduced in + https://review.gluster.org/#/c/glusterfs/+/18257/ + +>fixes: bz#1707731 +>Change-Id: Iad8da6c1e1ae8ecf7c84dfdf8ea3ac6966d8a2a0 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/22894/ + +Bug: 1708064 +Change-Id: Iad8da6c1e1ae8ecf7c84dfdf8ea3ac6966d8a2a0 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174743 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/gsyncd.py | 5 ++++ + geo-replication/syncdaemon/gsyncdconfig.py | 41 ++++++++++++++++++++++++++++++ + 2 files changed, 46 insertions(+) + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index effe0ce..a4c6f32 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -253,6 +253,11 @@ def main(): + if args.subcmd == "slave": + override_from_args = True + ++ if args.subcmd == "monitor": ++ ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"]) ++ if ret is not None: ++ gconf.config_upgrade(config_file, ret) ++ + # Load Config file + gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf", + config_file, +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index 23a1c57..7edc582 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -14,6 +14,7 @@ try: + except ImportError: + from configparser import ConfigParser, NoSectionError + import os ++import shutil + from string import Template + from datetime import datetime + +@@ -325,6 +326,46 @@ class Gconf(object): + + return False + ++def is_config_file_old(config_file, mastervol, slavevol): ++ cnf = ConfigParser() ++ cnf.read(config_file) ++ session_section = "peers %s %s" % (mastervol, slavevol) ++ try: ++ return dict(cnf.items(session_section)) ++ except NoSectionError: ++ return None ++ ++def config_upgrade(config_file, ret): ++ config_file_backup = os.path.join(os.path.dirname(config_file), "gsyncd.conf.bkp") ++ ++ #copy old config file in a backup file ++ shutil.copyfile(config_file, config_file_backup) ++ ++ #write a new config file ++ config = ConfigParser() ++ config.add_section('vars') ++ ++ for key, value in ret.items(): ++ #handle option name changes ++ if key == "use_tarssh": ++ new_key = "sync-method" ++ if value == "true": ++ new_value = "tarssh" ++ else: ++ new_value = "rsync" ++ config.set('vars', new_key, new_value) ++ ++ if key == "timeout": ++ new_key = "slave-timeout" ++ config.set('vars', new_key, value) ++ ++ #for changes like: ignore_deletes to ignore-deletes ++ new_key = key.replace("_", "-") ++ config.set('vars', new_key, value) ++ ++ with open(config_file, 'w') as configfile: ++ config.write(configfile) ++ + + def validate_unixtime(value): + try: +-- +1.8.3.1 + diff --git a/SOURCES/0215-posix-modify-storage.reserve-option-to-take-size-and.patch b/SOURCES/0215-posix-modify-storage.reserve-option-to-take-size-and.patch new file mode 100644 index 0000000..3e4217b --- /dev/null +++ b/SOURCES/0215-posix-modify-storage.reserve-option-to-take-size-and.patch @@ -0,0 +1,319 @@ +From 0c485548b4126ed907dec9941209b1b1312d0b5d Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Wed, 19 Jun 2019 15:08:58 +0530 +Subject: [PATCH 215/221] posix: modify storage.reserve option to take size and + percent + +* reverting changes made in +https://review.gluster.org/#/c/glusterfs/+/21686/ + +* Now storage.reserve can take value in percent or bytes + +> fixes: bz#1651445 +> Change-Id: Id4826210ec27991c55b17d1fecd90356bff3e036 +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +> Cherry pick from commit 5cbc87d8b8f1287e81c38b793b8d13b057208c62 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22900/ + +BUG: 1573077 +Change-Id: Id4826210ec27991c55b17d1fecd90356bff3e036 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174744 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/posix/bug-1651445.t | 29 +++++++++------------- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 33 ------------------------- + xlators/storage/posix/src/posix-common.c | 33 +++++++++++-------------- + xlators/storage/posix/src/posix-helpers.c | 26 +++++++++---------- + xlators/storage/posix/src/posix-inode-fd-ops.c | 15 ++++++----- + xlators/storage/posix/src/posix.h | 4 +-- + 6 files changed, 51 insertions(+), 89 deletions(-) + +diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t +index f6f1833..5248d47 100644 +--- a/tests/bugs/posix/bug-1651445.t ++++ b/tests/bugs/posix/bug-1651445.t +@@ -17,39 +17,34 @@ TEST $CLI volume start $V0 + + TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 + +-TEST $CLI volume set $V0 storage.reserve-size 10MB ++#Setting the size in bytes ++TEST $CLI volume set $V0 storage.reserve 40MB + +-#No effect as priority to reserve-size +-TEST $CLI volume set $V0 storage.reserve 20 ++#wait 5s to reset disk_space_full flag ++sleep 5 + + TEST dd if=/dev/zero of=$M0/a bs=100M count=1 +-sleep 5 ++TEST dd if=/dev/zero of=$M0/b bs=10M count=1 + +-#Below dd confirms posix is giving priority to reserve-size +-TEST dd if=/dev/zero of=$M0/b bs=40M count=1 ++# Wait 5s to update disk_space_full flag because thread check disk space ++# after every 5s + + sleep 5 ++# setup_lvm create lvm partition of 150M and 40M are reserve so after ++# consuming more than 110M next dd should fail + TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 + + rm -rf $M0/* +-#Size will reserve from the previously set reserve option = 20% +-TEST $CLI volume set $V0 storage.reserve-size 0 + +-#Overwrite reserve option +-TEST $CLI volume set $V0 storage.reserve-size 40MB ++#Setting the size in percent and repeating the above steps ++TEST $CLI volume set $V0 storage.reserve 40 + +-#wait 5s to reset disk_space_full flag + sleep 5 + +-TEST dd if=/dev/zero of=$M0/a bs=100M count=1 ++TEST dd if=/dev/zero of=$M0/a bs=80M count=1 + TEST dd if=/dev/zero of=$M0/b bs=10M count=1 + +-# Wait 5s to update disk_space_full flag because thread check disk space +-# after every 5s +- + sleep 5 +-# setup_lvm create lvm partition of 150M and 40M are reserve so after +-# consuming more than 110M next dd should fail + TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 + + TEST $CLI volume stop $V0 +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 3a7ab83..7a83124 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1231,30 +1231,6 @@ out: + + return ret; + } +-static int +-validate_size(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, +- char **op_errstr) +-{ +- xlator_t *this = NULL; +- uint64_t size = 0; +- int ret = -1; +- +- this = THIS; +- GF_VALIDATE_OR_GOTO("glusterd", this, out); +- ret = gf_string2bytesize_uint64(value, &size); +- if (ret < 0) { +- gf_asprintf(op_errstr, +- "%s is not a valid size. %s " +- "expects a valid value in bytes", +- value, key); +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", +- *op_errstr); +- } +-out: +- gf_msg_debug("glusterd", 0, "Returning %d", ret); +- +- return ret; +-} + + /* dispatch table for VOLUME SET + * ----------------------------- +@@ -2854,15 +2830,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_3_13_0, + }, + { +- .key = "storage.reserve-size", +- .voltype = "storage/posix", +- .value = "0", +- .validate_fn = validate_size, +- .description = "If set, priority will be given to " +- "storage.reserve-size over storage.reserve", +- .op_version = GD_OP_VERSION_7_0, +- }, +- { + .option = "health-check-timeout", + .key = "storage.health-check-timeout", + .type = NO_DOC, +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index 0f70af5..bfe2cb0 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -345,12 +345,14 @@ posix_reconfigure(xlator_t *this, dict_t *options) + " fallback to <hostname>:<export>"); + } + +- GF_OPTION_RECONF("reserve-size", priv->disk_reserve_size, options, size, ++ GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); ++ /* option can be any one of percent or bytes */ ++ priv->disk_unit = 0; ++ if (priv->disk_reserve < 100.0) ++ priv->disk_unit = 'p'; + +- GF_OPTION_RECONF("reserve", priv->disk_reserve_percent, options, uint32, +- out); +- if (priv->disk_reserve_size || priv->disk_reserve_percent) { ++ if (priv->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, +@@ -975,11 +977,15 @@ posix_init(xlator_t *this) + + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; +- GF_OPTION_INIT("reserve-size", _private->disk_reserve_size, size, out); + +- GF_OPTION_INIT("reserve", _private->disk_reserve_percent, uint32, out); ++ GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); ++ ++ /* option can be any one of percent or bytes */ ++ _private->disk_unit = 0; ++ if (_private->disk_reserve < 100.0) ++ _private->disk_unit = 'p'; + +- if (_private->disk_reserve_size || _private->disk_reserve_percent) { ++ if (_private->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, +@@ -1221,23 +1227,14 @@ struct volume_options posix_options[] = { + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"reserve"}, +- .type = GF_OPTION_TYPE_INT, ++ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, + .min = 0, + .default_value = "1", + .validate = GF_OPT_VALIDATE_MIN, +- .description = "Percentage of disk space to be reserved." ++ .description = "Percentage/Size of disk space to be reserved." + " Set to 0 to disable", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, +- {.key = {"reserve-size"}, +- .type = GF_OPTION_TYPE_SIZET, +- .min = 0, +- .default_value = "0", +- .validate = GF_OPT_VALIDATE_MIN, +- .description = "size in megabytes to be reserved for disk space." +- " Set to 0 to disable", +- .op_version = {GD_OP_VERSION_7_0}, +- .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 849db3d..07169b5 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2246,11 +2246,11 @@ posix_disk_space_check(xlator_t *this) + struct posix_private *priv = NULL; + char *subvol_path = NULL; + int op_ret = 0; +- uint64_t size = 0; +- int percent = 0; ++ double size = 0; ++ double percent = 0; + struct statvfs buf = {0}; +- uint64_t totsz = 0; +- uint64_t freesz = 0; ++ double totsz = 0; ++ double freesz = 0; + + GF_VALIDATE_OR_GOTO(this->name, this, out); + priv = this->private; +@@ -2258,14 +2258,6 @@ posix_disk_space_check(xlator_t *this) + + subvol_path = priv->base_path; + +- if (priv->disk_reserve_size) { +- size = priv->disk_reserve_size; +- } else { +- percent = priv->disk_reserve_percent; +- totsz = (buf.f_blocks * buf.f_bsize); +- size = ((totsz * percent) / 100); +- } +- + op_ret = sys_statvfs(subvol_path, &buf); + + if (op_ret == -1) { +@@ -2273,8 +2265,16 @@ posix_disk_space_check(xlator_t *this) + "statvfs failed on %s", subvol_path); + goto out; + } +- freesz = (buf.f_bfree * buf.f_bsize); + ++ if (priv->disk_unit == 'p') { ++ percent = priv->disk_reserve; ++ totsz = (buf.f_blocks * buf.f_bsize); ++ size = ((totsz * percent) / 100); ++ } else { ++ size = priv->disk_reserve; ++ } ++ ++ freesz = (buf.f_bfree * buf.f_bsize); + if (freesz <= size) { + priv->disk_space_full = 1; + } else { +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index b92c411..fc847d6 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -720,7 +720,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + thread after every 5 sec sleep to working correctly storage.reserve + option behaviour + */ +- if (priv->disk_reserve_size || priv->disk_reserve_percent) ++ if (priv->disk_reserve) + posix_disk_space_check(this); + + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); +@@ -2306,7 +2306,7 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + }; + struct posix_private *priv = NULL; + int shared_by = 1; +- int percent = 0; ++ double percent = 0; + uint64_t reserved_blocks = 0; + + VALIDATE_OR_GOTO(frame, out); +@@ -2332,11 +2332,14 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + goto out; + } + +- if (priv->disk_reserve_size) { +- reserved_blocks = priv->disk_reserve_size / buf.f_bsize; ++ if (priv->disk_unit == 'p') { ++ percent = priv->disk_reserve; ++ reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5); + } else { +- percent = priv->disk_reserve_percent; +- reserved_blocks = (buf.f_blocks * percent) / 100; ++ if (buf.f_bsize) { ++ reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) / ++ buf.f_bsize; ++ } + } + + if (buf.f_bfree > reserved_blocks) { +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 4364b96..b0935a7 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -225,8 +225,8 @@ struct posix_private { + pthread_t health_check; + gf_boolean_t health_check_active; + +- uint32_t disk_reserve_percent; +- uint64_t disk_reserve_size; ++ double disk_reserve; ++ char disk_unit; + uint32_t disk_space_full; + pthread_t disk_space_check; + gf_boolean_t disk_space_check_active; +-- +1.8.3.1 + diff --git a/SOURCES/0216-Test-case-fixe-for-downstream-3.5.0.patch b/SOURCES/0216-Test-case-fixe-for-downstream-3.5.0.patch new file mode 100644 index 0000000..bc4ce60 --- /dev/null +++ b/SOURCES/0216-Test-case-fixe-for-downstream-3.5.0.patch @@ -0,0 +1,29 @@ +From b2204969bb0dba5de32685e1021fa44d0c406813 Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Tue, 25 Jun 2019 12:17:10 +0530 +Subject: [PATCH 216/221] Test case fixe for downstream 3.5.0 + +Mark bug-1319374-THIS-crash.t as bad. + +BUG: 1704562 +Change-Id: I6afeb9a74ab88af7b741454367005250cd4c0e0f +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174652 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + tests/bugs/gfapi/bug-1319374-THIS-crash.t | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tests/bugs/gfapi/bug-1319374-THIS-crash.t b/tests/bugs/gfapi/bug-1319374-THIS-crash.t +index 8d3db42..429d71e 100755 +--- a/tests/bugs/gfapi/bug-1319374-THIS-crash.t ++++ b/tests/bugs/gfapi/bug-1319374-THIS-crash.t +@@ -25,3 +25,5 @@ TEST $(dirname $0)/bug-1319374 $H0 $V0 $logdir/bug-1319374.log + cleanup_tester $(dirname $0)/bug-1319374 + + cleanup; ++#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1723673 ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1723673 +-- +1.8.3.1 + diff --git a/SOURCES/0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch b/SOURCES/0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch new file mode 100644 index 0000000..055b9f2 --- /dev/null +++ b/SOURCES/0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch @@ -0,0 +1,75 @@ +From 71ff9b7c6356e521d98ee025554b63dd23db9836 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Thu, 13 Jun 2019 22:43:47 +0530 +Subject: [PATCH 217/221] uss: Fix tar issue with ctime and uss enabled + +Problem: +If ctime and uss enabled, tar still complains with 'file +changed as we read it' + +Cause: +To clear nfs cache (gluster-nfs), the ctime was incremented +in snap-view client on stat cbk. + +Fix: +The ctime should not be incremented manually. Since gluster-nfs +is planning to be deprecated, this code is being removed to +fix the issue. + +Backport of: + > Patch: https://review.gluster.org/22861 + > Change-Id: Iae7f100c20fce880a50b008ba716077350281404 + > fixes: bz#1720290 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: Iae7f100c20fce880a50b008ba716077350281404 +BUG: 1709301 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/173922 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + .../features/snapview-client/src/snapview-client.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c +index 5d7986c..9c789ae 100644 +--- a/xlators/features/snapview-client/src/snapview-client.c ++++ b/xlators/features/snapview-client/src/snapview-client.c +@@ -577,20 +577,24 @@ gf_svc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) + { +- /* Consider a testcase: ++ /* TODO: FIX ME ++ * Consider a testcase: + * #mount -t nfs host1:/vol1 /mnt + * #ls /mnt + * #ls /mnt/.snaps (As expected this fails) + * #gluster volume set vol1 features.uss enable +- * Now `ls /mnt/.snaps` should work, +- * but fails with No such file or directory. +- * This is because NFS client caches the list of files in +- * a directory. This cache is updated if there are any changes +- * in the directory attributes. To solve this problem change +- * a attribute 'ctime' when USS is enabled ++ * Now `ls /mnt/.snaps` should work, but fails with No such file or ++ * directory. This is because NFS client (gNFS) caches the list of files ++ * in a directory. This cache is updated if there are any changes in the ++ * directory attributes. So, one way to solve this problem is to change ++ * 'ctime' attribute when USS is enabled as below. ++ * ++ * if (op_ret == 0 && IA_ISDIR(buf->ia_type)) ++ * buf->ia_ctime_nsec++; ++ * ++ * But this is not the ideal solution as applications see the unexpected ++ * ctime change causing failures. + */ +- if (op_ret == 0 && IA_ISDIR(buf->ia_type)) +- buf->ia_ctime_nsec++; + + SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch b/SOURCES/0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch new file mode 100644 index 0000000..b7db655 --- /dev/null +++ b/SOURCES/0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch @@ -0,0 +1,88 @@ +From 8cc6d8af00303c445b94715c92fe9e3e01edb867 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Mon, 24 Jun 2019 15:49:04 +0530 +Subject: [PATCH 218/221] graph/shd: Use glusterfs_graph_deactivate to free the + xl rec + +We were using glusterfs_graph_fini to free the xl rec from +glusterfs_process_volfp as well as glusterfs_graph_cleanup. + +Instead we can use glusterfs_graph_deactivate, which does +fini as well as other common rec free. + +Backport of:https://review.gluster.org/#/c/glusterfs/+/22904/ + +>Change-Id: Ie4a5f2771e5254aa5ed9f00c3672a6d2cc8e4bc1 +>Updates: bz#1716695 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I09d7124366bc690ceca9e8d0adee8a0dc8081091 +BUG: 1711939 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174814 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/graph.c | 2 +- + libglusterfs/src/xlator.c | 9 ++++++++- + xlators/features/shard/src/shard.c | 3 +++ + 3 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 27d9335..5b95fd6 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1394,7 +1394,7 @@ glusterfs_graph_cleanup(void *arg) + + pthread_mutex_lock(&ctx->cleanup_lock); + { +- glusterfs_graph_fini(graph); ++ glusterfs_graph_deactivate(graph); + glusterfs_graph_destroy(graph); + } + pthread_mutex_unlock(&ctx->cleanup_lock); +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 71e1ed4..d9d3441 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -659,6 +659,7 @@ xlator_fini_rec(xlator_t *xl) + trav = trav->next; + } + ++ xl->cleanup_starting = 1; + if (xl->init_succeeded) { + if (xl->fini) { + old_THIS = THIS; +@@ -666,8 +667,14 @@ xlator_fini_rec(xlator_t *xl) + + xl->fini(xl); + +- if (xl->local_pool) ++ if (xl->local_pool) { + mem_pool_destroy(xl->local_pool); ++ xl->local_pool = NULL; ++ } ++ if (xl->itable) { ++ inode_table_destroy(xl->itable); ++ xl->itable = NULL; ++ } + + THIS = old_THIS; + } else { +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index b248767..31c7eec 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -6785,6 +6785,9 @@ fini(xlator_t *this) + + GF_VALIDATE_OR_GOTO("shard", this, out); + ++ /*Itable was not created by shard, hence setting to NULL.*/ ++ this->itable = NULL; ++ + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + +-- +1.8.3.1 + diff --git a/SOURCES/0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch b/SOURCES/0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch new file mode 100644 index 0000000..eefb890 --- /dev/null +++ b/SOURCES/0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch @@ -0,0 +1,35 @@ +From d9781ed4964d9e752fc880c8cd8afcbd2c561ebe Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Wed, 26 Jun 2019 15:58:33 +0530 +Subject: [PATCH 219/221] posix : add posix_set_ctime() in posix_ftruncate() + +Upstream references : +> release 6: https://review.gluster.org/#/c/glusterfs/+/22965/ +> mainline: https://review.gluster.org/#/c/glusterfs/+/22948/ + +Change-Id: I0cb5320fea71306e0283509ae47024f23874b53b +fixes: bz#1720163 +Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174837 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-inode-fd-ops.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index fc847d6..c949f68 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -5059,6 +5059,8 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + goto out; + } + ++ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); ++ + op_ret = 0; + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch b/SOURCES/0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch new file mode 100644 index 0000000..07f702b --- /dev/null +++ b/SOURCES/0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch @@ -0,0 +1,190 @@ +From b963fa8bb71963127147d33bf609f439dd5bd107 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 27 Jun 2019 19:17:29 +0530 +Subject: [PATCH 220/221] graph/shd: Use top down approach while cleaning + xlator + +We were cleaning xlator from botton to top, which might +lead to problems when upper xlators trying to access +the xlator object loaded below. + +One such scenario is when fd_unref happens as part of the +fini call which might lead to calling the releasedir to +lower xlator. This will lead to invalid mem access + +Backport of:https://review.gluster.org/#/c/glusterfs/+/22968/ + +>Change-Id: I8a6cb619256fab0b0c01a2d564fc88287c4415a0 +>Updates: bz#1716695 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I22bbf99e9451183b3e0fe61b57b2440ab4163fe5 +BUG: 1711939 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174882 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/graph.c | 10 +++++++++- + xlators/features/bit-rot/src/stub/bit-rot-stub.c | 1 + + xlators/features/changelog/src/changelog.c | 1 + + xlators/features/cloudsync/src/cloudsync.c | 4 +++- + xlators/features/index/src/index.c | 1 + + xlators/features/quiesce/src/quiesce.c | 1 + + xlators/features/read-only/src/worm.c | 1 + + xlators/features/sdfs/src/sdfs.c | 1 + + xlators/features/selinux/src/selinux.c | 2 ++ + xlators/features/trash/src/trash.c | 1 + + 10 files changed, 21 insertions(+), 2 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 5b95fd6..172dc61 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1193,6 +1193,14 @@ glusterfs_graph_fini(glusterfs_graph_t *graph) + if (trav->init_succeeded) { + trav->cleanup_starting = 1; + trav->fini(trav); ++ if (trav->local_pool) { ++ mem_pool_destroy(trav->local_pool); ++ trav->local_pool = NULL; ++ } ++ if (trav->itable) { ++ inode_table_destroy(trav->itable); ++ trav->itable = NULL; ++ } + trav->init_succeeded = 0; + } + trav = trav->next; +@@ -1394,7 +1402,7 @@ glusterfs_graph_cleanup(void *arg) + + pthread_mutex_lock(&ctx->cleanup_lock); + { +- glusterfs_graph_deactivate(graph); ++ glusterfs_graph_fini(graph); + glusterfs_graph_destroy(graph); + } + pthread_mutex_unlock(&ctx->cleanup_lock); +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +index 3f48a4b..03446be 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +@@ -185,6 +185,7 @@ cleanup_lock: + pthread_mutex_destroy(&priv->lock); + free_mempool: + mem_pool_destroy(priv->local_pool); ++ priv->local_pool = NULL; + free_priv: + GF_FREE(priv); + this->private = NULL; +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index d9025f3..2862d1e 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -2790,6 +2790,7 @@ cleanup_options: + changelog_freeup_options(this, priv); + cleanup_mempool: + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + cleanup_priv: + GF_FREE(priv); + error_return: +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index 26e512c..0ad987e 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -200,8 +200,10 @@ cs_init(xlator_t *this) + + out: + if (ret == -1) { +- if (this->local_pool) ++ if (this->local_pool) { + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; ++ } + + cs_cleanup_private(priv); + +diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c +index 2f2a6d0..4ece7ff 100644 +--- a/xlators/features/index/src/index.c ++++ b/xlators/features/index/src/index.c +@@ -2478,6 +2478,7 @@ out: + GF_FREE(priv); + this->private = NULL; + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + } + + if (attr_inited) +diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c +index bfd1116..06f58c9 100644 +--- a/xlators/features/quiesce/src/quiesce.c ++++ b/xlators/features/quiesce/src/quiesce.c +@@ -2536,6 +2536,7 @@ fini(xlator_t *this) + this->private = NULL; + + mem_pool_destroy(priv->local_pool); ++ priv->local_pool = NULL; + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); + out: +diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c +index 24196f8..7d13180 100644 +--- a/xlators/features/read-only/src/worm.c ++++ b/xlators/features/read-only/src/worm.c +@@ -569,6 +569,7 @@ fini(xlator_t *this) + mem_put(priv); + this->private = NULL; + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + out: + return; + } +diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c +index f0247fd..164c632 100644 +--- a/xlators/features/sdfs/src/sdfs.c ++++ b/xlators/features/sdfs/src/sdfs.c +@@ -1429,6 +1429,7 @@ void + fini(xlator_t *this) + { + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + return; + } + +diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c +index 58b4c5d..ce5fc90 100644 +--- a/xlators/features/selinux/src/selinux.c ++++ b/xlators/features/selinux/src/selinux.c +@@ -256,6 +256,7 @@ out: + GF_FREE(priv); + } + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + } + return ret; + } +@@ -284,6 +285,7 @@ fini(xlator_t *this) + GF_FREE(priv); + + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + + return; + } +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index d668436..eb5007b 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -2523,6 +2523,7 @@ out: + GF_FREE(priv); + } + mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + } + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch b/SOURCES/0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch new file mode 100644 index 0000000..74e3796 --- /dev/null +++ b/SOURCES/0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch @@ -0,0 +1,75 @@ +From ac216eae4775f7d95877b247937e2a4a4828b1b2 Mon Sep 17 00:00:00 2001 +From: Raghavendra G <rgowdapp@redhat.com> +Date: Tue, 4 Jun 2019 19:22:45 +0530 +Subject: [PATCH 221/221] protocol/client: propagte GF_EVENT_CHILD_PING only + for connections to brick + +Two reasons: +* ping responses from glusterd may not be relevant for Halo + replication. Instead, it might be interested in only knowing whether + the brick itself is responsive. +* When a brick is killed, propagating GF_EVENT_CHILD_PING of ping + response from glusterd results in GF_EVENT_DISCONNECT spuriously + propagated to parent xlators. These DISCONNECT events are from the + connections client establishes with glusterd as part of its + reconnect logic. Without GF_EVENT_CHILD_PING, the last event + propagated to parent xlators would be the first DISCONNECT event + from brick and hence subsequent DISCONNECTS to glusterd are not + propagated as protocol/client prevents same event being propagated + to parent xlators consecutively. propagating GF_EVENT_CHILD_PING for + ping responses from glusterd would change the last_sent_event to + GF_EVENT_CHILD_PING and hence protocol/client cannot prevent + subsequent DISCONNECT events + +>Signed-off-by: Raghavendra G <rgowdapp@redhat.com> +>Fixes: bz#1716979 +>Change-Id: I50276680c52f05ca9e12149a3094923622d6eaef + +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22821/ + +BUG: 1703423 +Change-Id: I50276680c52f05ca9e12149a3094923622d6eaef +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174883 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/protocol/client/src/client.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index a372807..95e4be5 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -2276,6 +2276,12 @@ client_mark_fd_bad(xlator_t *this) + return 0; + } + ++static int ++is_connection_to_brick(struct rpc_clnt *rpc) ++{ ++ return (rpc->conn.config.remote_port != 0); ++} ++ + int + client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + void *data) +@@ -2297,10 +2303,12 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + + switch (event) { + case RPC_CLNT_PING: { +- ret = default_notify(this, GF_EVENT_CHILD_PING, data); +- if (ret) +- gf_log(this->name, GF_LOG_INFO, "CHILD_PING notify failed"); +- conf->last_sent_event = GF_EVENT_CHILD_PING; ++ if (is_connection_to_brick(rpc)) { ++ ret = default_notify(this, GF_EVENT_CHILD_PING, data); ++ if (ret) ++ gf_log(this->name, GF_LOG_INFO, "CHILD_PING notify failed"); ++ conf->last_sent_event = GF_EVENT_CHILD_PING; ++ } + break; + } + case RPC_CLNT_CONNECT: { +-- +1.8.3.1 + diff --git a/SOURCES/0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch b/SOURCES/0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch new file mode 100644 index 0000000..3a492cb --- /dev/null +++ b/SOURCES/0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch @@ -0,0 +1,109 @@ +From 5f304e003cc24ff7877ab51bdfded0dbf8ec581b Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Fri, 21 Jun 2019 09:04:19 +0530 +Subject: [PATCH 222/255] cluster/dht: Fixed a memleak in dht_rename_cbk + +Fixed a memleak in dht_rename_cbk when creating +a linkto file. + +upstream: https://review.gluster.org/#/c/glusterfs/+/22912/ + +>Change-Id: I705adef3cb79e33806520fc2b15558e90e2c211c +>fixes: bz#1722698 +>Signed-off-by: N Balachandran <nbalacha@redhat.com> + +BUG:1722512 +Change-Id: I8450cac82a0e1611e698ffac476ea5516e614236 +Signed-off-by: N Balachandran <nbalacha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175181 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Susant Palai <spalai@redhat.com> +--- + xlators/cluster/dht/src/dht-rename.c | 44 +++++++++++++++++++++++++++--------- + 1 file changed, 33 insertions(+), 11 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c +index 893b451..5ba2373 100644 +--- a/xlators/cluster/dht/src/dht-rename.c ++++ b/xlators/cluster/dht/src/dht-rename.c +@@ -1009,9 +1009,11 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + { + xlator_t *prev = NULL; + dht_local_t *local = NULL; ++ call_frame_t *main_frame = NULL; + + prev = cookie; + local = frame->local; ++ main_frame = local->main_frame; + + /* TODO: Handle this case in lookup-optimize */ + if (op_ret == -1) { +@@ -1024,7 +1026,8 @@ dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + dht_linkfile_attr_heal(frame, this); + } + +- dht_rename_unlink(frame, this); ++ dht_rename_unlink(main_frame, this); ++ DHT_STACK_DESTROY(frame); + return 0; + } + +@@ -1040,7 +1043,8 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + xlator_t *src_cached = NULL; + xlator_t *dst_hashed = NULL; + xlator_t *dst_cached = NULL; +- loc_t link_loc = {0}; ++ call_frame_t *link_frame = NULL; ++ dht_local_t *link_local = NULL; + + local = frame->local; + prev = cookie; +@@ -1110,18 +1114,36 @@ dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + /* Create the linkto file for the dst file */ + if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) { +- loc_copy(&link_loc, &local->loc2); +- if (link_loc.inode) +- inode_unref(link_loc.inode); +- link_loc.inode = inode_ref(local->loc.inode); +- gf_uuid_copy(local->gfid, local->loc.inode->gfid); +- gf_uuid_copy(link_loc.gfid, local->loc.inode->gfid); +- +- dht_linkfile_create(frame, dht_rename_links_create_cbk, this, +- src_cached, dst_hashed, &link_loc); ++ link_frame = copy_frame(frame); ++ if (!link_frame) { ++ goto unlink; ++ } ++ ++ /* fop value sent as maxvalue because it is not used ++ * anywhere in this case */ ++ link_local = dht_local_init(link_frame, &local->loc2, NULL, ++ GF_FOP_MAXVALUE); ++ if (!link_local) { ++ goto unlink; ++ } ++ ++ if (link_local->loc.inode) ++ inode_unref(link_local->loc.inode); ++ link_local->loc.inode = inode_ref(local->loc.inode); ++ link_local->main_frame = frame; ++ link_local->stbuf = local->stbuf; ++ gf_uuid_copy(link_local->gfid, local->loc.inode->gfid); ++ ++ dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this, ++ src_cached, dst_hashed, &link_local->loc); + return 0; + } + ++unlink: ++ ++ if (link_frame) { ++ DHT_STACK_DESTROY(link_frame); ++ } + dht_rename_unlink(frame, this); + return 0; + +-- +1.8.3.1 + diff --git a/SOURCES/0223-change-get_real_filename-implementation-to-use-ENOAT.patch b/SOURCES/0223-change-get_real_filename-implementation-to-use-ENOAT.patch new file mode 100644 index 0000000..a533388 --- /dev/null +++ b/SOURCES/0223-change-get_real_filename-implementation-to-use-ENOAT.patch @@ -0,0 +1,123 @@ +From 36b0bd86321436a951f225fcf2e921390ed8dc33 Mon Sep 17 00:00:00 2001 +From: Michael Adam <obnox@samba.org> +Date: Thu, 20 Jun 2019 13:09:37 +0200 +Subject: [PATCH 223/255] change get_real_filename implementation to use + ENOATTR instead of ENOENT + +get_real_filename is implemented as a virtual extended attribute to help +Samba implement the case-insensitive but case preserving SMB protocol +more efficiently. It is implemented as a getxattr call on the parent directory +with the virtual key of "get_real_filename:<entryname>" by looking for a +spelling with different case for the provided file/dir name (<entryname>) +and returning this correct spelling as a result if the entry is found. +Originally (05aaec645a6262d431486eb5ac7cd702646cfcfb), the +implementation used the ENOENT errno to return the authoritative answer +that <entryname> does not exist in any case folding. + +Now this implementation is actually a violation or misuse of the defined +API for the getxattr call which returns ENOENT for the case that the dir +that the call is made against does not exist and ENOATTR (or the synonym +ENODATA) for the case that the xattr does not exist. + +This was not a problem until the gluster fuse-bridge was changed +to do map ENOENT to ESTALE in 59629f1da9dca670d5dcc6425f7f89b3e96b46bf, +after which we the getxattr call for get_real_filename returned an +ESTALE instead of ENOENT breaking the expectation in Samba. + +It is an independent problem that ESTALE should not leak out to user +space but is intended to trigger retries between fuse and gluster. +But nevertheless, the semantics seem to be incorrect here and should +be changed. + +This patch changes the implementation of the get_real_filename virtual +xattr to correctly return ENOATTR instead of ENOENT if the file/directory +being looked up is not found. + +The Samba glusterfs_fuse vfs module which takes advantage of the +get_real_filename over a fuse mount will receive a corresponding change +to map ENOATTR to ENOENT. Without this change, it will still work +correctly, but the performance optimization for nonexisting files is +lost. On the other hand side, this change removes the distinction +between the old not-implemented case and the implemented case. +So Samba changed to treat ENOATTR like ENOENT will not work correctly +any more against old servers that don't implement get_real_filename. +I.e. existing files will be reported as non-existing + +Backport of https://review.gluster.org/c/glusterfs/+/22925 + +Change-Id: I971b427ab8410636d5d201157d9af70e0d075b67 +fixes: bz#1724089 +Signed-off-by: Michael Adam <obnox@samba.org> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175012 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 8 ++++---- + xlators/storage/posix/src/posix-inode-fd-ops.c | 4 ++-- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 9a6ea5b..219b072 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4618,7 +4618,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie, + + LOCK(&frame->lock); + { +- if (local->op_errno == ENODATA || local->op_errno == EOPNOTSUPP) { ++ if (local->op_errno == EOPNOTSUPP) { + /* Nothing to do here, we have already found + * a subvol which does not have the get_real_filename + * optimization. If condition is for simple logic. +@@ -4627,7 +4627,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie, + } + + if (op_ret == -1) { +- if (op_errno == ENODATA || op_errno == EOPNOTSUPP) { ++ if (op_errno == EOPNOTSUPP) { + /* This subvol does not have the optimization. + * Better let the user know we don't support it. + * Remove previous results if any. +@@ -4655,7 +4655,7 @@ dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie, + goto post_unlock; + } + +- if (op_errno == ENOENT) { ++ if (op_errno == ENOATTR) { + /* Do nothing, our defaults are set to this. + */ + goto unlock; +@@ -4723,7 +4723,7 @@ dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, + cnt = local->call_cnt = layout->cnt; + + local->op_ret = -1; +- local->op_errno = ENOENT; ++ local->op_errno = ENOATTR; + + for (i = 0; i < cnt; i++) { + subvol = layout->list[i].xlator; +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index c949f68..ea3b69c 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -2954,7 +2954,7 @@ posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, + (void)sys_closedir(fd); + + if (!found) +- return -ENOENT; ++ return -ENOATTR; + + ret = dict_set_dynstr(dict, (char *)key, found); + if (ret) { +@@ -3422,7 +3422,7 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + if (ret < 0) { + op_ret = -1; + op_errno = -ret; +- if (op_errno == ENOENT) { ++ if (op_errno == ENOATTR) { + gf_msg_debug(this->name, 0, + "Failed to get " + "real filename (%s, %s)", +-- +1.8.3.1 + diff --git a/SOURCES/0224-core-replace-inet_addr-with-inet_pton.patch b/SOURCES/0224-core-replace-inet_addr-with-inet_pton.patch new file mode 100644 index 0000000..f9a3b56 --- /dev/null +++ b/SOURCES/0224-core-replace-inet_addr-with-inet_pton.patch @@ -0,0 +1,53 @@ +From 3528c4fb59ca4d3efda2cf0689b7549e449bb91b Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Fri, 14 Jun 2019 07:53:06 +0000 +Subject: [PATCH 224/255] core: replace inet_addr with inet_pton + +Fixes warning raised by RPMDiff on the use of inet_addr, which may +impact Ipv6 support + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22866/ + +>fixes: bz#1721385 +>Change-Id: Id2d9afa1747efa64bc79d90dd2566bff54deedeb +>Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> + +BUG: 1698435 +Change-Id: Id2d9afa1747efa64bc79d90dd2566bff54deedeb +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175318 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/events.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 9b3a226..2509767 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -41,6 +41,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + char *host = NULL; + struct addrinfo hints; + struct addrinfo *result = NULL; ++ xlator_t *this = THIS; + + /* Global context */ + ctx = THIS->ctx; +@@ -82,7 +83,12 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + /* Socket Configurations */ + server.sin_family = AF_INET; + server.sin_port = htons(EVENT_PORT); +- server.sin_addr.s_addr = inet_addr(host); ++ ret = inet_pton(server.sin_family, host, &server.sin_addr); ++ if (ret <= 0) { ++ gf_msg(this->name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, ++ "inet_pton failed with return code %d", ret); ++ goto out; ++ } + memset(&server.sin_zero, '\0', sizeof(server.sin_zero)); + + va_start(arguments, fmt); +-- +1.8.3.1 + diff --git a/SOURCES/0225-tests-utils-Fix-py2-py3-util-python-scripts.patch b/SOURCES/0225-tests-utils-Fix-py2-py3-util-python-scripts.patch new file mode 100644 index 0000000..5ad185d --- /dev/null +++ b/SOURCES/0225-tests-utils-Fix-py2-py3-util-python-scripts.patch @@ -0,0 +1,448 @@ +From 9d10b1fd102dc2d5bfa71891ded52b7a8f5e08d8 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Thu, 6 Jun 2019 12:54:04 +0530 +Subject: [PATCH 225/255] tests/utils: Fix py2/py3 util python scripts + +Following files are fixed. + +tests/bugs/distribute/overlap.py +tests/utils/changelogparser.py +tests/utils/create-files.py +tests/utils/gfid-access.py +tests/utils/libcxattr.py + +> upstream patch link : https://review.gluster.org/#/c/glusterfs/+/22829/ + +>Change-Id: I3db857cc19e19163d368d913eaec1269fbc37140 +>updates: bz#1193929 +>Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: I3db857cc19e19163d368d913eaec1269fbc37140 +BUG: 1704562 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175483 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/bugs/distribute/overlap.py | 2 +- + tests/bugs/glusterfs/bug-902610.t | 2 +- + tests/utils/changelogparser.py | 5 +- + tests/utils/create-files.py | 9 +- + tests/utils/gfid-access.py | 62 +++++++++---- + tests/utils/libcxattr.py | 22 +++-- + tests/utils/py2py3.py | 186 ++++++++++++++++++++++++++++++++++++++ + 7 files changed, 258 insertions(+), 30 deletions(-) + create mode 100644 tests/utils/py2py3.py + +diff --git a/tests/bugs/distribute/overlap.py b/tests/bugs/distribute/overlap.py +index 0941d37..2813979 100755 +--- a/tests/bugs/distribute/overlap.py ++++ b/tests/bugs/distribute/overlap.py +@@ -17,7 +17,7 @@ def calculate_one (ov, nv): + + def calculate_all (values): + total = 0 +- nv_index = len(values) / 2 ++ nv_index = len(values) // 2 + for old_val in values[:nv_index]: + new_val = values[nv_index] + nv_index += 1 +diff --git a/tests/bugs/glusterfs/bug-902610.t b/tests/bugs/glusterfs/bug-902610.t +index b45e92b..112c947 100755 +--- a/tests/bugs/glusterfs/bug-902610.t ++++ b/tests/bugs/glusterfs/bug-902610.t +@@ -28,7 +28,7 @@ function get_layout() + fi + + # Figure out where the join point is. +- target=$( $PYTHON -c "print '%08x' % (0x$layout1_e + 1)") ++ target=$( $PYTHON -c "print('%08x' % (0x$layout1_e + 1))") + #echo "target for layout2 = $target" > /dev/tty + + # The second layout should cover everything that the first doesn't. +diff --git a/tests/utils/changelogparser.py b/tests/utils/changelogparser.py +index e8e252d..3b8f81d 100644 +--- a/tests/utils/changelogparser.py ++++ b/tests/utils/changelogparser.py +@@ -125,7 +125,10 @@ class Record(object): + return repr(self.__dict__) + + def __str__(self): +- return unicode(self).encode('utf-8') ++ if sys.version_info >= (3,): ++ return self.__unicode__() ++ else: ++ return unicode(self).encode('utf-8') + + + def get_num_tokens(data, tokens, version=Version.V11): +diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py +index b2a1961..04736e9 100755 +--- a/tests/utils/create-files.py ++++ b/tests/utils/create-files.py +@@ -19,6 +19,11 @@ import argparse + datsiz = 0 + timr = 0 + ++def get_ascii_upper_alpha_digits(): ++ if sys.version_info > (3,0): ++ return string.ascii_uppercase+string.digits ++ else: ++ return string.uppercase+string.digits + + def setLogger(filename): + global logger +@@ -111,7 +116,7 @@ def create_tar_file(fil, size, mins, maxs, rand): + + def get_filename(flen): + size = flen +- char = string.uppercase+string.digits ++ char = get_ascii_upper_alpha_digits() + st = ''.join(random.choice(char) for i in range(size)) + ti = str((hex(int(str(time.time()).split('.')[0])))[2:]) + return ti+"%%"+st +@@ -175,7 +180,7 @@ def tar_files(files, file_count, inter, size, mins, maxs, + + + def setxattr_files(files, randname, dir_path): +- char = string.uppercase+string.digits ++ char = get_ascii_upper_alpha_digits() + if not randname: + for k in range(files): + v = ''.join(random.choice(char) for i in range(10)) +diff --git a/tests/utils/gfid-access.py b/tests/utils/gfid-access.py +index 556d2b4..c35c122 100755 +--- a/tests/utils/gfid-access.py ++++ b/tests/utils/gfid-access.py +@@ -33,23 +33,51 @@ def _fmt_mkdir(l): + def _fmt_symlink(l1, l2): + return "!II%dsI%ds%ds" % (37, l1+1, l2+1) + +-def entry_pack_reg(gf, bn, mo, uid, gid): +- blen = len(bn) +- return struct.pack(_fmt_mknod(blen), +- uid, gid, gf, mo, bn, +- stat.S_IMODE(mo), 0, umask()) +- +-def entry_pack_dir(gf, bn, mo, uid, gid): +- blen = len(bn) +- return struct.pack(_fmt_mkdir(blen), +- uid, gid, gf, mo, bn, +- stat.S_IMODE(mo), umask()) +- +-def entry_pack_symlink(gf, bn, lnk, mo, uid, gid): +- blen = len(bn) +- llen = len(lnk) +- return struct.pack(_fmt_symlink(blen, llen), +- uid, gid, gf, mo, bn, lnk) ++ ++if sys.version_info > (3,): ++ def entry_pack_reg(gf, bn, mo, uid, gid): ++ bn_encoded = bn.encode() ++ blen = len(bn_encoded) ++ return struct.pack(_fmt_mknod(blen), ++ uid, gid, gf.encode(), mo, bn_encoded, ++ stat.S_IMODE(mo), 0, umask()) ++ ++ # mkdir ++ def entry_pack_dir(gf, bn, mo, uid, gid): ++ bn_encoded = bn.encode() ++ blen = len(bn_encoded) ++ return struct.pack(_fmt_mkdir(blen), ++ uid, gid, gf.encode(), mo, bn_encoded, ++ stat.S_IMODE(mo), umask()) ++ # symlink ++ def entry_pack_symlink(gf, bn, lnk, st): ++ bn_encoded = bn.encode() ++ blen = len(bn_encoded) ++ lnk_encoded = lnk.encode() ++ llen = len(lnk_encoded) ++ return struct.pack(_fmt_symlink(blen, llen), ++ st['uid'], st['gid'], ++ gf.encode(), st['mode'], bn_encoded, ++ lnk_encoded) ++ ++else: ++ def entry_pack_reg(gf, bn, mo, uid, gid): ++ blen = len(bn) ++ return struct.pack(_fmt_mknod(blen), ++ uid, gid, gf, mo, bn, ++ stat.S_IMODE(mo), 0, umask()) ++ ++ def entry_pack_dir(gf, bn, mo, uid, gid): ++ blen = len(bn) ++ return struct.pack(_fmt_mkdir(blen), ++ uid, gid, gf, mo, bn, ++ stat.S_IMODE(mo), umask()) ++ ++ def entry_pack_symlink(gf, bn, lnk, mo, uid, gid): ++ blen = len(bn) ++ llen = len(lnk) ++ return struct.pack(_fmt_symlink(blen, llen), ++ uid, gid, gf, mo, bn, lnk) + + if __name__ == '__main__': + if len(sys.argv) < 9: +diff --git a/tests/utils/libcxattr.py b/tests/utils/libcxattr.py +index fd0b083..3f3ed1f 100644 +--- a/tests/utils/libcxattr.py ++++ b/tests/utils/libcxattr.py +@@ -10,7 +10,9 @@ + + import os + import sys +-from ctypes import CDLL, c_int, create_string_buffer ++from ctypes import CDLL, c_int ++from py2py3 import bytearray_to_str, gr_create_string_buffer ++from py2py3 import gr_query_xattr, gr_lsetxattr, gr_lremovexattr + + + class Xattr(object): +@@ -47,20 +49,23 @@ class Xattr(object): + @classmethod + def _query_xattr(cls, path, siz, syscall, *a): + if siz: +- buf = create_string_buffer('\0' * siz) ++ buf = gr_create_string_buffer(siz) + else: + buf = None + ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz))) + if ret == -1: + cls.raise_oserr() + if siz: +- return buf.raw[:ret] ++ # py2 and py3 compatibility. Convert bytes array ++ # to string ++ result = bytearray_to_str(buf.raw) ++ return result[:ret] + else: + return ret + + @classmethod + def lgetxattr(cls, path, attr, siz=0): +- return cls._query_xattr(path, siz, 'lgetxattr', attr) ++ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr) + + @classmethod + def lgetxattr_buf(cls, path, attr): +@@ -74,20 +79,21 @@ class Xattr(object): + + @classmethod + def llistxattr(cls, path, siz=0): +- ret = cls._query_xattr(path, siz, 'llistxattr') ++ ret = gr_query_xattr(cls, path, siz, 'llistxattr') + if isinstance(ret, str): +- ret = ret.split('\0') ++ ret = ret.strip('\0') ++ ret = ret.split('\0') if ret else [] + return ret + + @classmethod + def lsetxattr(cls, path, attr, val): +- ret = cls.libc.lsetxattr(path, attr, val, len(val), 0) ++ ret = gr_lsetxattr(cls, path, attr, val) + if ret == -1: + cls.raise_oserr() + + @classmethod + def lremovexattr(cls, path, attr): +- ret = cls.libc.lremovexattr(path, attr) ++ ret = gr_lremovexattr(cls, path, attr) + if ret == -1: + cls.raise_oserr() + +diff --git a/tests/utils/py2py3.py b/tests/utils/py2py3.py +new file mode 100644 +index 0000000..63aca10 +--- /dev/null ++++ b/tests/utils/py2py3.py +@@ -0,0 +1,186 @@ ++# ++# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> ++# This file is part of GlusterFS. ++ ++# This file is licensed to you under your choice of the GNU Lesser ++# General Public License, version 3 or any later version (LGPLv3 or ++# later), or the GNU General Public License, version 2 (GPLv2), in all ++# cases as published by the Free Software Foundation. ++# ++ ++# All python2/python3 compatibility routines ++ ++import sys ++import os ++import stat ++import struct ++from ctypes import create_string_buffer ++ ++def umask(): ++ return os.umask(0) ++ ++if sys.version_info >= (3,): ++ def pipe(): ++ (r, w) = os.pipe() ++ os.set_inheritable(r, True) ++ os.set_inheritable(w, True) ++ return (r, w) ++ ++ # Raw conversion of bytearray to string. Used in the cases where ++ # buffer is created by create_string_buffer which is a 8-bit char ++ # array and passed to syscalls to fetch results. Using encode/decode ++ # doesn't work as it converts to string altering the size. ++ def bytearray_to_str(byte_arr): ++ return ''.join([chr(b) for b in byte_arr]) ++ ++ # Raw conversion of string to bytes. This is required to convert ++ # back the string into bytearray(c char array) to use in struc ++ # pack/unpacking. Again encode/decode can't be used as it ++ # converts it alters size. ++ def str_to_bytearray(string): ++ return bytes([ord(c) for c in string]) ++ ++ def gr_create_string_buffer(size): ++ return create_string_buffer(b'\0', size) ++ ++ def gr_query_xattr(cls, path, size, syscall, attr=None): ++ if attr: ++ return cls._query_xattr(path.encode(), size, syscall, ++ attr.encode()) ++ else: ++ return cls._query_xattr(path.encode(), size, syscall) ++ ++ def gr_lsetxattr(cls, path, attr, val): ++ return cls.libc.lsetxattr(path.encode(), attr.encode(), val, ++ len(val), 0) ++ ++ def gr_lremovexattr(cls, path, attr): ++ return cls.libc.lremovexattr(path.encode(), attr.encode()) ++ ++ def gr_cl_register(cls, brick, path, log_file, log_level, retries): ++ return cls._get_api('gf_changelog_register')(brick.encode(), ++ path.encode(), ++ log_file.encode(), ++ log_level, retries) ++ ++ def gr_cl_done(cls, clfile): ++ return cls._get_api('gf_changelog_done')(clfile.encode()) ++ ++ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel, ++ actual_end): ++ return cls._get_api('gf_history_changelog')(changelog_path.encode(), ++ start, end, num_parallel, ++ actual_end) ++ ++ def gr_cl_history_done(cls, clfile): ++ return cls._get_api('gf_history_changelog_done')(clfile.encode()) ++ ++ # regular file ++ ++ def entry_pack_reg(cls, gf, bn, mo, uid, gid): ++ bn_encoded = bn.encode() ++ blen = len(bn_encoded) ++ return struct.pack(cls._fmt_mknod(blen), ++ uid, gid, gf.encode(), mo, bn_encoded, ++ stat.S_IMODE(mo), 0, umask()) ++ ++ def entry_pack_reg_stat(cls, gf, bn, st): ++ bn_encoded = bn.encode() ++ blen = len(bn_encoded) ++ mo = st['mode'] ++ return struct.pack(cls._fmt_mknod(blen), ++ st['uid'], st['gid'], ++ gf.encode(), mo, bn_encoded, ++ stat.S_IMODE(mo), 0, umask()) ++ # mkdir ++ ++ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid): ++ bn_encoded = bn.encode() ++ blen = len(bn_encoded) ++ return struct.pack(cls._fmt_mkdir(blen), ++ uid, gid, gf.encode(), mo, bn_encoded, ++ stat.S_IMODE(mo), umask()) ++ # symlink ++ ++ def entry_pack_symlink(cls, gf, bn, lnk, st): ++ bn_encoded = bn.encode() ++ blen = len(bn_encoded) ++ lnk_encoded = lnk.encode() ++ llen = len(lnk_encoded) ++ return struct.pack(cls._fmt_symlink(blen, llen), ++ st['uid'], st['gid'], ++ gf.encode(), st['mode'], bn_encoded, ++ lnk_encoded) ++else: ++ def pipe(): ++ (r, w) = os.pipe() ++ return (r, w) ++ ++ # Raw conversion of bytearray to string ++ def bytearray_to_str(byte_arr): ++ return byte_arr ++ ++ # Raw conversion of string to bytearray ++ def str_to_bytearray(string): ++ return string ++ ++ def gr_create_string_buffer(size): ++ return create_string_buffer('\0', size) ++ ++ def gr_query_xattr(cls, path, size, syscall, attr=None): ++ if attr: ++ return cls._query_xattr(path, size, syscall, attr) ++ else: ++ return cls._query_xattr(path, size, syscall) ++ ++ def gr_lsetxattr(cls, path, attr, val): ++ return cls.libc.lsetxattr(path, attr, val, len(val), 0) ++ ++ def gr_lremovexattr(cls, path, attr): ++ return cls.libc.lremovexattr(path, attr) ++ ++ def gr_cl_register(cls, brick, path, log_file, log_level, retries): ++ return cls._get_api('gf_changelog_register')(brick, path, log_file, ++ log_level, retries) ++ ++ def gr_cl_done(cls, clfile): ++ return cls._get_api('gf_changelog_done')(clfile) ++ ++ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel, ++ actual_end): ++ return cls._get_api('gf_history_changelog')(changelog_path, start, end, ++ num_parallel, actual_end) ++ ++ def gr_cl_history_done(cls, clfile): ++ return cls._get_api('gf_history_changelog_done')(clfile) ++ ++ # regular file ++ ++ def entry_pack_reg(cls, gf, bn, mo, uid, gid): ++ blen = len(bn) ++ return struct.pack(cls._fmt_mknod(blen), ++ uid, gid, gf, mo, bn, ++ stat.S_IMODE(mo), 0, umask()) ++ ++ def entry_pack_reg_stat(cls, gf, bn, st): ++ blen = len(bn) ++ mo = st['mode'] ++ return struct.pack(cls._fmt_mknod(blen), ++ st['uid'], st['gid'], ++ gf, mo, bn, ++ stat.S_IMODE(mo), 0, umask()) ++ # mkdir ++ ++ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid): ++ blen = len(bn) ++ return struct.pack(cls._fmt_mkdir(blen), ++ uid, gid, gf, mo, bn, ++ stat.S_IMODE(mo), umask()) ++ # symlink ++ ++ def entry_pack_symlink(cls, gf, bn, lnk, st): ++ blen = len(bn) ++ llen = len(lnk) ++ return struct.pack(cls._fmt_symlink(blen, llen), ++ st['uid'], st['gid'], ++ gf, st['mode'], bn, lnk) +-- +1.8.3.1 + diff --git a/SOURCES/0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch b/SOURCES/0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch new file mode 100644 index 0000000..f8f382a --- /dev/null +++ b/SOURCES/0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch @@ -0,0 +1,92 @@ +From 1c55f3633f748629cd0484f79b6c49101eb2df82 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Mon, 8 Jul 2019 11:47:28 +0530 +Subject: [PATCH 226/255] geo-rep : fix gluster command path for non-root + session + +Problem: +gluster command not found. + +Cause: +In Volinfo class we issue command 'gluster vol info' to get information +about volume like getting brick_root to perform various operation. +When geo-rep session is configured for non-root user Volinfo class +fails to issue gluster command due to unavailability of gluster +binary path for non-root user. + +Solution: +Use config value 'slave-gluster-command-dir'/'gluster-command-dir' to get path +for gluster command based on caller. + +>Backport of: +>Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/22920/. +>fixes: bz#1722740 +>Change-Id: I4ec46373da01f5d00ecd160c4e8c6239da8b3859 +>Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1712591 +Change-Id: Ifea2927253a9521fa459fea6de8a60085c3413f6 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175485 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/monitor.py | 4 ++-- + geo-replication/syncdaemon/syncdutils.py | 12 +++++++++--- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index c45ef24..234f3f1 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -369,7 +369,7 @@ def distribute(master, slave): + if rconf.args.use_gconf_volinfo: + mvol = VolinfoFromGconf(master.volume, master=True) + else: +- mvol = Volinfo(master.volume, master.host) ++ mvol = Volinfo(master.volume, master.host, master=True) + logging.debug('master bricks: ' + repr(mvol.bricks)) + prelude = [] + slave_host = None +@@ -385,7 +385,7 @@ def distribute(master, slave): + if rconf.args.use_gconf_volinfo: + svol = VolinfoFromGconf(slave.volume, master=False) + else: +- svol = Volinfo(slave.volume, "localhost", prelude) ++ svol = Volinfo(slave.volume, "localhost", prelude, master=False) + + sbricks = svol.bricks + suuid = svol.uuid +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 3f41b5f..2ee10ac 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -672,7 +672,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + dir_path = ENOENT + + if not slv_bricks: +- slv_info = Volinfo(slv_volume, slv_host) ++ slv_info = Volinfo(slv_volume, slv_host, master=False) + slv_bricks = slv_info.bricks + # Result of readlink would be of format as below. + # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename" +@@ -854,8 +854,14 @@ class Popen(subprocess.Popen): + + class Volinfo(object): + +- def __init__(self, vol, host='localhost', prelude=[]): +- po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host, ++ def __init__(self, vol, host='localhost', prelude=[], master=True): ++ if master: ++ gluster_cmd_dir = gconf.get("gluster-command-dir") ++ else: ++ gluster_cmd_dir = gconf.get("slave-gluster-command-dir") ++ ++ gluster_cmd = os.path.join(gluster_cmd_dir, 'gluster') ++ po = Popen(prelude + [gluster_cmd, '--xml', '--remote-host=' + host, + 'volume', 'info', vol], + stdout=PIPE, stderr=PIPE, universal_newlines=True) + vix = po.stdout.read() +-- +1.8.3.1 + diff --git a/SOURCES/0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch b/SOURCES/0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch new file mode 100644 index 0000000..41d482f --- /dev/null +++ b/SOURCES/0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch @@ -0,0 +1,914 @@ +From b0815b8a84a07d17a1215c55afc38888ee9fc37c Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Mon, 24 Jun 2019 12:00:20 +0530 +Subject: [PATCH 227/255] glusterd/svc: update pid of mux volumes from the shd + process + +For a normal volume, we are updating the pid from a the +process while we do a daemonization or at the end of the +init if it is no-daemon mode. Along with updating the pid +we also lock the file, to make sure that the process is +running fine. + +With brick mux, we were updating the pidfile from gluterd +after an attach/detach request. + +There are two problems with this approach. +1) We are not holding a pidlock for any file other than parent + process. +2) There is a chance for possible race conditions with attach/detach. + For example, shd start and a volume stop could race. Let's say + we are starting an shd and it is attached to a volume. + While we trying to link the pid file to the running process, + this would have deleted by the thread that doing a volume stop. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22935/ + +>Change-Id: I29a00352102877ce09ea3f376ca52affceb5cf1a +>Updates: bz#1722541 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +Change-Id: I29a00352102877ce09ea3f376ca52affceb5cf1a +BUG: 1721802 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175723 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + glusterfsd/src/gf_attach.c | 2 + + glusterfsd/src/glusterfsd-mgmt.c | 66 +++++++-- + libglusterfs/src/glusterfs/glusterfs.h | 2 +- + libglusterfs/src/glusterfs/libglusterfs-messages.h | 3 +- + libglusterfs/src/graph.c | 154 ++++++++++++++++++++- + rpc/xdr/src/glusterd1-xdr.x | 1 + + xlators/mgmt/glusterd/src/glusterd-handler.c | 2 + + xlators/mgmt/glusterd/src/glusterd-handshake.c | 42 +++++- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 + + .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 25 ++++ + .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 3 + + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 57 ++++---- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 6 +- + 15 files changed, 325 insertions(+), 52 deletions(-) + +diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c +index 6293b9b..1bff854 100644 +--- a/glusterfsd/src/gf_attach.c ++++ b/glusterfsd/src/gf_attach.c +@@ -65,6 +65,8 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op) + brick_req.name = path; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; ++ brick_req.dict.dict_val = NULL; ++ brick_req.dict.dict_len = 0; + + req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); + iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index 1d2cd1a..f930e0a 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -50,13 +50,16 @@ int + emancipate(glusterfs_ctx_t *ctx, int ret); + int + glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum); ++ char *volfile_id, char *checksum, ++ dict_t *dict); + int + glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, +- gf_volfile_t *volfile_obj, char *checksum); ++ gf_volfile_t *volfile_obj, char *checksum, ++ dict_t *dict); + int + glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum); ++ char *volfile_id, char *checksum, ++ dict_t *dict); + int + glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj); + +@@ -75,7 +78,8 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + } + + int +-mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id) ++mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id, ++ dict_t *dict) + { + glusterfs_ctx_t *ctx = NULL; + int ret = 0; +@@ -145,11 +149,11 @@ mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id) + * the volfile + */ + ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id, +- sha256_hash); ++ sha256_hash, dict); + goto unlock; + } + ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj, +- sha256_hash); ++ sha256_hash, dict); + if (ret < 0) { + gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!"); + } +@@ -387,6 +391,8 @@ err: + UNLOCK(&ctx->volfile_lock); + if (xlator_req.input.input_val) + free(xlator_req.input.input_val); ++ if (xlator_req.dict.dict_val) ++ free(xlator_req.dict.dict_val); + free(xlator_req.name); + xlator_req.name = NULL; + return 0; +@@ -561,6 +567,8 @@ out: + + free(xlator_req.name); + free(xlator_req.input.input_val); ++ if (xlator_req.dict.dict_val) ++ free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + if (dict) +@@ -982,6 +990,8 @@ out: + if (input) + dict_unref(input); + free(xlator_req.input.input_val); /*malloced by xdr*/ ++ if (xlator_req.dict.dict_val) ++ free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + free(xlator_req.name); +@@ -1062,6 +1072,8 @@ glusterfs_handle_attach(rpcsvc_request_t *req) + out: + UNLOCK(&ctx->volfile_lock); + } ++ if (xlator_req.dict.dict_val) ++ free(xlator_req.dict.dict_val); + free(xlator_req.input.input_val); + free(xlator_req.name); + +@@ -1077,6 +1089,7 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req) + }; + xlator_t *this = NULL; + glusterfs_ctx_t *ctx = NULL; ++ dict_t *dict = NULL; + + GF_ASSERT(req); + this = THIS; +@@ -1091,20 +1104,41 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req) + req->rpc_err = GARBAGE_ARGS; + goto out; + } ++ + gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, + "received attach " + "request for volfile-id=%s", + xlator_req.name); ++ ++ dict = dict_new(); ++ if (!dict) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ ++ ret = dict_unserialize(xlator_req.dict.dict_val, xlator_req.dict.dict_len, ++ &dict); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, ++ "failed to unserialize xdata to dictionary"); ++ goto out; ++ } ++ dict->extra_stdfree = xlator_req.dict.dict_val; ++ + ret = 0; + + if (ctx->active) { + ret = mgmt_process_volfile(xlator_req.input.input_val, +- xlator_req.input.input_len, xlator_req.name); ++ xlator_req.input.input_len, xlator_req.name, ++ dict); + } else { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, + "got attach for %s but no active graph", xlator_req.name); + } + out: ++ if (dict) ++ dict_unref(dict); + if (xlator_req.input.input_val) + free(xlator_req.input.input_val); + if (xlator_req.name) +@@ -1241,6 +1275,8 @@ out: + GF_FREE(filepath); + if (xlator_req.input.input_val) + free(xlator_req.input.input_val); ++ if (xlator_req.dict.dict_val) ++ free(xlator_req.dict.dict_val); + + return ret; + } +@@ -1313,6 +1349,8 @@ out: + if (dict) + dict_unref(dict); + free(xlator_req.input.input_val); // malloced by xdr ++ if (xlator_req.dict.dict_val) ++ free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + free(xlator_req.name); // malloced by xdr +@@ -1461,6 +1499,8 @@ out: + if (output) + dict_unref(output); + free(brick_req.input.input_val); ++ if (brick_req.dict.dict_val) ++ free(brick_req.dict.dict_val); + free(brick_req.name); + GF_FREE(xname); + GF_FREE(msg); +@@ -1654,6 +1694,8 @@ out: + if (dict) + dict_unref(dict); + free(node_req.input.input_val); ++ if (node_req.dict.dict_val) ++ free(node_req.dict.dict_val); + GF_FREE(msg); + GF_FREE(rsp.output.output_val); + GF_FREE(node_name); +@@ -1757,6 +1799,8 @@ glusterfs_handle_nfs_profile(rpcsvc_request_t *req) + + out: + free(nfs_req.input.input_val); ++ if (nfs_req.dict.dict_val) ++ free(nfs_req.dict.dict_val); + if (dict) + dict_unref(dict); + if (output) +@@ -1835,6 +1879,8 @@ out: + if (dict) + dict_unref(dict); + free(xlator_req.input.input_val); // malloced by xdr ++ if (xlator_req.dict.dict_val) ++ free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + free(xlator_req.name); // malloced by xdr +@@ -1963,7 +2009,8 @@ out: + if (dict) + dict_unref(dict); + free(brick_req.input.input_val); +- ++ if (brick_req.dict.dict_val) ++ free(brick_req.dict.dict_val); + gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; + } +@@ -2213,7 +2260,8 @@ volfile: + size = rsp.op_ret; + volfile_id = frame->local; + if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { +- ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id); ++ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id, ++ dict); + goto post_graph_mgmt; + } + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 9ec2365..b6327b8 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -744,7 +744,7 @@ typedef struct { + char vol_id[NAME_MAX + 1]; + struct list_head volfile_list; + glusterfs_graph_t *graph; +- ++ FILE *pidfp; + } gf_volfile_t; + + glusterfs_ctx_t * +diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h +index ea2aa60..7e0eebb 100644 +--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h ++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h +@@ -111,6 +111,7 @@ GLFS_MSGID( + LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG, + LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE, + LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED, +- LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED); ++ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED, ++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED); + + #endif /* !_LG_MESSAGES_H_ */ +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 172dc61..05f76bf 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1467,6 +1467,21 @@ out: + } + + int ++glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj) ++{ ++ if (!volfile_obj || !volfile_obj->pidfp) ++ return 0; ++ ++ gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id); ++ ++ lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0); ++ fclose(volfile_obj->pidfp); ++ volfile_obj->pidfp = NULL; ++ ++ return 0; ++} ++ ++int + glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + { + xlator_t *last_xl = NULL; +@@ -1502,6 +1517,7 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + + list_del_init(&volfile_obj->volfile_list); + glusterfs_mux_xlator_unlink(parent_graph->top, xl); ++ glusterfs_svc_mux_pidfile_cleanup(volfile_obj); + parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); + parent_graph->xl_count -= graph->xl_count; + parent_graph->leaf_count -= graph->leaf_count; +@@ -1531,8 +1547,126 @@ out: + } + + int ++glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file) ++{ ++ int ret = -1; ++ FILE *pidfp = NULL; ++ ++ if (!pid_file || !volfile_obj) ++ goto out; ++ ++ if (volfile_obj->pidfp) { ++ ret = 0; ++ goto out; ++ } ++ pidfp = fopen(pid_file, "a+"); ++ if (!pidfp) { ++ goto out; ++ } ++ volfile_obj->pidfp = pidfp; ++ ++ ret = lockf(fileno(pidfp), F_TLOCK, 0); ++ if (ret) { ++ ret = 0; ++ goto out; ++ } ++out: ++ return ret; ++} ++ ++int ++glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj, ++ const char *pid_file, pid_t pid) ++{ ++ int ret = 0; ++ FILE *pidfp = NULL; ++ int old_pid; ++ ++ if (!volfile_obj->pidfp) { ++ ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file); ++ if (ret == -1) ++ goto out; ++ } ++ pidfp = volfile_obj->pidfp; ++ ret = fscanf(pidfp, "%d", &old_pid); ++ if (ret <= 0) { ++ goto update; ++ } ++ if (old_pid == pid) { ++ ret = 0; ++ goto out; ++ } else { ++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, ++ "Old pid=%d found in pidfile %s. Cleaning the old pid and " ++ "Updating new pid=%d", ++ old_pid, pid_file, pid); ++ } ++update: ++ ret = sys_ftruncate(fileno(pidfp), 0); ++ if (ret) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, errno, ++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, ++ "pidfile %s truncation failed", pid_file); ++ goto out; ++ } ++ ++ ret = fprintf(pidfp, "%d\n", pid); ++ if (ret <= 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, errno, ++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed", ++ pid_file); ++ goto out; ++ } ++ ++ ret = fflush(pidfp); ++ if (ret) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, errno, ++ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed", ++ pid_file); ++ goto out; ++ } ++out: ++ return ret; ++} ++ ++int ++glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj) ++{ ++ char *file = NULL; ++ int ret = -1; ++ ++ GF_VALIDATE_OR_GOTO("graph", dict, out); ++ GF_VALIDATE_OR_GOTO("graph", volfile_obj, out); ++ ++ ret = dict_get_str(dict, "pidfile", &file); ++ if (ret < 0) { ++ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, ++ "Failed to get pidfile from dict for volfile_id=%s", ++ volfile_obj->vol_id); ++ } ++ ++ ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid()); ++ if (ret < 0) { ++ ret = -1; ++ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, ++ "Failed to update " ++ "the pidfile for volfile_id=%s", ++ volfile_obj->vol_id); ++ ++ goto out; ++ } ++ ++ if (ret == 1) ++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, ++ "PID %d updated in pidfile=%s", getpid(), file); ++ ret = 0; ++out: ++ return ret; ++} ++int + glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum) ++ char *volfile_id, char *checksum, ++ dict_t *dict) + { + glusterfs_graph_t *graph = NULL; + glusterfs_graph_t *parent_graph = NULL; +@@ -1615,18 +1749,25 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, + ret = -1; + goto out; + } ++ volfile_obj->pidfp = NULL; ++ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", ++ volfile_id); ++ ++ if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) { ++ ret = glusterfs_update_mux_pid(dict, volfile_obj); ++ if (ret == -1) { ++ goto out; ++ } ++ } + + graph->used = 1; + parent_graph->id++; + list_add(&graph->list, &ctx->graphs); + INIT_LIST_HEAD(&volfile_obj->volfile_list); + volfile_obj->graph = graph; +- snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", +- volfile_id); + memcpy(volfile_obj->volfile_checksum, checksum, + sizeof(volfile_obj->volfile_checksum)); + list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list); +- + gf_log_dump_graph(fp, graph); + graph = NULL; + +@@ -1654,7 +1795,8 @@ out: + + int + glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, +- gf_volfile_t *volfile_obj, char *checksum) ++ gf_volfile_t *volfile_obj, char *checksum, ++ dict_t *dict) + { + glusterfs_graph_t *oldvolfile_graph = NULL; + glusterfs_graph_t *newvolfile_graph = NULL; +@@ -1703,7 +1845,7 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + } + volfile_obj = NULL; + ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id, +- checksum); ++ checksum, dict); + goto out; + } + +diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x +index 9b36d34..02ebec2 100644 +--- a/rpc/xdr/src/glusterd1-xdr.x ++++ b/rpc/xdr/src/glusterd1-xdr.x +@@ -132,6 +132,7 @@ struct gd1_mgmt_brick_op_req { + string name<>; + int op; + opaque input<>; ++ opaque dict<>; + } ; + + struct gd1_mgmt_brick_op_rsp { +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index af8a8a4..cc1f1df 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -5423,6 +5423,8 @@ glusterd_print_client_details(FILE *fp, dict_t *dict, + + brick_req->op = GLUSTERD_BRICK_STATUS; + brick_req->name = ""; ++ brick_req->dict.dict_val = NULL; ++ brick_req->dict.dict_len = 0; + + ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"), + brickinfo->path); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index 1ba58c3..86dec82 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -203,7 +203,7 @@ out: + + size_t + build_volfile_path(char *volume_id, char *path, size_t path_len, +- char *trusted_str) ++ char *trusted_str, dict_t *dict) + { + struct stat stbuf = { + 0, +@@ -340,11 +340,19 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, + + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + if (ret == -1) { +- gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, ++ "Couldn't find volinfo for volid=%s", volid_ptr); + goto out; + } + + glusterd_svc_build_shd_volfile_path(volinfo, path, path_len); ++ ++ ret = glusterd_svc_set_shd_pidfile(volinfo, dict); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, ++ "Couldn't set pidfile in dict for volid=%s", volid_ptr); ++ goto out; ++ } + ret = 0; + goto out; + } +@@ -919,6 +927,7 @@ __server_getspec(rpcsvc_request_t *req) + char addrstr[RPCSVC_PEER_STRLEN] = {0}; + peer_info_t *peerinfo = NULL; + xlator_t *this = NULL; ++ dict_t *dict = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -971,6 +980,12 @@ __server_getspec(rpcsvc_request_t *req) + goto fail; + } + ++ dict = dict_new(); ++ if (!dict) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ + trans = req->trans; + /* addrstr will be empty for cli socket connections */ + ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr)); +@@ -989,12 +1004,26 @@ __server_getspec(rpcsvc_request_t *req) + */ + if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) { + ret = build_volfile_path(volume, filename, sizeof(filename), +- TRUSTED_PREFIX); ++ TRUSTED_PREFIX, dict); + } else { +- ret = build_volfile_path(volume, filename, sizeof(filename), NULL); ++ ret = build_volfile_path(volume, filename, sizeof(filename), NULL, ++ dict); + } + + if (ret == 0) { ++ if (dict->count > 0) { ++ ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val, ++ &rsp.xdata.xdata_len); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_SERL_LENGTH_GET_FAIL, ++ "Failed to serialize dict " ++ "to request buffer"); ++ goto fail; ++ } ++ dict->extra_free = rsp.xdata.xdata_val; ++ } ++ + /* to allocate the proper buffer to hold the file data */ + ret = sys_stat(filename, &stbuf); + if (ret < 0) { +@@ -1036,7 +1065,6 @@ __server_getspec(rpcsvc_request_t *req) + goto fail; + } + } +- + /* convert to XDR */ + fail: + if (spec_fd >= 0) +@@ -1056,6 +1084,10 @@ fail: + (xdrproc_t)xdr_gf_getspec_rsp); + free(args.key); // malloced by xdr + free(rsp.spec); ++ ++ if (dict) ++ dict_unref(dict); ++ + if (args.xdata.xdata_val) + free(args.xdata.xdata_val); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 9ea695e..454877b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -655,6 +655,8 @@ glusterd_brick_op_build_payload(glusterd_op_t op, + break; + } + ++ brick_req->dict.dict_len = 0; ++ brick_req->dict.dict_val = NULL; + ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, + &brick_req->input.input_len); + if (ret) +@@ -723,6 +725,8 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, + goto out; + } + ++ brick_req->dict.dict_len = 0; ++ brick_req->dict.dict_val = NULL; + ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, + &brick_req->input.input_len); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +index 57ceda9..5661e39 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +@@ -126,3 +126,28 @@ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) + out: + return; + } ++ ++int ++glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict) ++{ ++ int ret = -1; ++ glusterd_svc_t *svc = NULL; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, volinfo, out); ++ GF_VALIDATE_OR_GOTO(this->name, dict, out); ++ ++ svc = &(volinfo->shd.svc); ++ ++ ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, ++ "Failed to set pidfile %s in dict", svc->proc.pidfile); ++ goto out; ++ } ++ ret = 0; ++out: ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +index 59466ec..1f0984b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +@@ -36,4 +36,7 @@ glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, + int + glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo); + ++int ++glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 8ad90a9..590169f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -258,14 +258,20 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + gf_boolean_t shd_restart = _gf_false; + + conf = THIS->private; +- volinfo = data; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); ++ volinfo = data; + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + if (volinfo) + glusterd_volinfo_ref(volinfo); + ++ if (volinfo->is_snap_volume) { ++ /* healing of a snap volume is not supported yet*/ ++ ret = 0; ++ goto out; ++ } ++ + while (conf->restart_shd) { + synclock_unlock(&conf->big_lock); + sleep(2); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index 400826f..e106111 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -519,7 +519,7 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + /* Take first entry from the process */ + parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, + mux_svc); +- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); ++ glusterd_copy_file(parent_svc->proc.pidfile, svc->proc.pidfile); + mux_conn = &parent_svc->conn; + if (volinfo) + volinfo->shd.attached = _gf_true; +@@ -623,12 +623,9 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, + glusterd_volinfo_t *volinfo = NULL; + glusterd_shdsvc_t *shd = NULL; + glusterd_svc_t *svc = frame->cookie; +- glusterd_svc_t *parent_svc = NULL; +- glusterd_svc_proc_t *mux_proc = NULL; + glusterd_conf_t *conf = NULL; + int *flag = (int *)frame->local; + xlator_t *this = THIS; +- int pid = -1; + int ret = -1; + gf_getspec_rsp rsp = { + 0, +@@ -679,27 +676,7 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, + } + + if (rsp.op_ret == 0) { +- pthread_mutex_lock(&conf->attach_lock); +- { +- if (!strcmp(svc->name, "glustershd")) { +- mux_proc = svc->svc_proc; +- if (mux_proc && +- !gf_is_service_running(svc->proc.pidfile, &pid)) { +- /* +- * When svc's are restarting, there is a chance that the +- * attached svc might not have updated it's pid. Because +- * it was at connection stage. So in that case, we need +- * to retry the pid file copy. +- */ +- parent_svc = cds_list_entry(mux_proc->svcs.next, +- glusterd_svc_t, mux_svc); +- if (parent_svc) +- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); +- } +- } +- svc->online = _gf_true; +- } +- pthread_mutex_unlock(&conf->attach_lock); ++ svc->online = _gf_true; + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, + "svc %s of volume %s attached successfully to pid %d", svc->name, + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +@@ -726,7 +703,7 @@ out: + + extern size_t + build_volfile_path(char *volume_id, char *path, size_t path_len, +- char *trusted_str); ++ char *trusted_str, dict_t *dict); + + int + __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, +@@ -751,6 +728,7 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + ssize_t req_size = 0; + call_frame_t *frame = NULL; + gd1_mgmt_brick_op_req brick_req; ++ dict_t *dict = NULL; + void *req = &brick_req; + void *errlbl = &&err; + struct rpc_clnt_connection *conn; +@@ -776,6 +754,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + brick_req.name = volfile_id; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; ++ brick_req.dict.dict_val = NULL; ++ brick_req.dict.dict_len = 0; + + frame = create_frame(this, this->ctx->pool); + if (!frame) { +@@ -783,7 +763,13 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + } + + if (op == GLUSTERD_SVC_ATTACH) { +- (void)build_volfile_path(volfile_id, path, sizeof(path), NULL); ++ dict = dict_new(); ++ if (!dict) { ++ ret = -ENOMEM; ++ goto *errlbl; ++ } ++ ++ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict); + + ret = sys_stat(path, &stbuf); + if (ret < 0) { +@@ -818,6 +804,18 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + ret = -EIO; + goto *errlbl; + } ++ if (dict->count > 0) { ++ ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val, ++ &brick_req.dict.dict_len); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, ++ GD_MSG_DICT_SERL_LENGTH_GET_FAIL, ++ "Failed to serialize dict " ++ "to request buffer"); ++ goto *errlbl; ++ } ++ dict->extra_free = brick_req.dict.dict_val; ++ } + + frame->cookie = svc; + frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int); +@@ -862,6 +860,8 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + GF_ATOMIC_INC(conf->blockers); + ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, + iobref, frame, NULL, 0, NULL, 0, NULL); ++ if (dict) ++ dict_unref(dict); + GF_FREE(volfile_content); + if (spec_fd >= 0) + sys_close(spec_fd); +@@ -874,6 +874,9 @@ maybe_free_iobuf: + iobuf_unref(iobuf); + } + err: ++ if (dict) ++ dict_unref(dict); ++ + GF_FREE(volfile_content); + if (spec_fd >= 0) + sys_close(spec_fd); +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 618d8bc..a8098df 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -143,6 +143,8 @@ gd_brick_op_req_free(gd1_mgmt_brick_op_req *req) + if (!req) + return; + ++ if (req->dict.dict_val) ++ GF_FREE(req->dict.dict_val); + GF_FREE(req->input.input_val); + GF_FREE(req); + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 4c487d0..2eb5116 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5914,6 +5914,8 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, + brick_req.name = path; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; ++ brick_req.dict.dict_val = NULL; ++ brick_req.dict.dict_len = 0; + + req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); + iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); +@@ -5977,7 +5979,7 @@ err: + + extern size_t + build_volfile_path(char *volume_id, char *path, size_t path_len, +- char *trusted_str); ++ char *trusted_str, dict_t *dict); + + static int + attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, +@@ -6022,7 +6024,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, + goto out; + } + +- (void)build_volfile_path(full_id, path, sizeof(path), NULL); ++ (void)build_volfile_path(full_id, path, sizeof(path), NULL, NULL); + + for (tries = 15; tries > 0; --tries) { + rpc = rpc_clnt_ref(other_brick->rpc); +-- +1.8.3.1 + diff --git a/SOURCES/0228-locks-enable-notify-contention-by-default.patch b/SOURCES/0228-locks-enable-notify-contention-by-default.patch new file mode 100644 index 0000000..310cd8b --- /dev/null +++ b/SOURCES/0228-locks-enable-notify-contention-by-default.patch @@ -0,0 +1,39 @@ +From 21fe2ef700e76c8b7be40f21d3a4fb6b96eafaf0 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Thu, 6 Jun 2019 08:12:34 +0200 +Subject: [PATCH 228/255] locks: enable notify-contention by default + +This patch enables the lock contention notification by default. + +Upstream patch: +> Change-Id: I10131b026a7cb09fc7c93e1e6c8549988c1d7751 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22828 +> BUG: 1717754 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: I10131b026a7cb09fc7c93e1e6c8549988c1d7751 +Fixes: bz#1720488 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/174655 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index adb0df5..9db5ac6 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -4796,7 +4796,7 @@ struct volume_options options[] = { + "be used in conjunction w/ revocation-clear-all."}, + {.key = {"notify-contention"}, + .type = GF_OPTION_TYPE_BOOL, +- .default_value = "no", ++ .default_value = "yes", + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .op_version = {GD_OP_VERSION_4_0_0}, + .tags = {"locks", "contention"}, +-- +1.8.3.1 + diff --git a/SOURCES/0229-glusterd-Show-the-correct-brick-status-in-get-state.patch b/SOURCES/0229-glusterd-Show-the-correct-brick-status-in-get-state.patch new file mode 100644 index 0000000..112c02e --- /dev/null +++ b/SOURCES/0229-glusterd-Show-the-correct-brick-status-in-get-state.patch @@ -0,0 +1,113 @@ +From 4fc0a77db5b9760fa5c00d3803c6d11a28a00b74 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Wed, 3 Jul 2019 15:22:38 +0530 +Subject: [PATCH 229/255] glusterd: Show the correct brick status in get-state + +Problem: get-state does not show correct brick status if brick + status is not Started, it always shows started if any value + is set brickinfo->status + +Solution: Check the value of brickinfo->status to show correct status + in get-state + +> Change-Id: I12a79619024c2cf59f338220d144f2f034059b3b +> fixes: bz#1726906 +> (Cherry pick from commit af989db23d1db00e087f2b9d3dfc43b13ef17153) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22983/) + +BUG: 1726991 +Change-Id: I12a79619024c2cf59f338220d144f2f034059b3b +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175355 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 7 +++++-- + xlators/mgmt/glusterd/src/glusterd-utils.c | 28 ++++++++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-utils.h | 4 ++++ + 3 files changed, 37 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index cc1f1df..94e1be5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -5589,7 +5589,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + char vol_status_str[STATUS_STRLEN] = { + 0, + }; +- ++ char brick_status_str[STATUS_STRLEN] = { ++ 0, ++ }; + this = THIS; + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + +@@ -5852,8 +5854,9 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + brickinfo->rdma_port); + fprintf(fp, "Volume%d.Brick%d.port_registered: %d\n", count_bkp, + count, brickinfo->port_registered); ++ glusterd_brick_get_status_str(brickinfo, brick_status_str); + fprintf(fp, "Volume%d.Brick%d.status: %s\n", count_bkp, count, +- brickinfo->status ? "Started" : "Stopped"); ++ brick_status_str); + + /*FIXME: This is a hacky way of figuring out whether a + * brick belongs to the hot or cold tier */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2eb5116..3bdfd49 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13898,6 +13898,34 @@ out: + return ret; + } + ++void ++glusterd_brick_get_status_str(glusterd_brickinfo_t *brickinfo, char *status_str) ++{ ++ GF_VALIDATE_OR_GOTO(THIS->name, brickinfo, out); ++ GF_VALIDATE_OR_GOTO(THIS->name, status_str, out); ++ ++ switch (brickinfo->status) { ++ case GF_BRICK_STOPPED: ++ sprintf(status_str, "%s", "Stopped"); ++ break; ++ case GF_BRICK_STARTED: ++ sprintf(status_str, "%s", "Started"); ++ break; ++ case GF_BRICK_STARTING: ++ sprintf(status_str, "%s", "Starting"); ++ break; ++ case GF_BRICK_STOPPING: ++ sprintf(status_str, "%s", "Stopping"); ++ break; ++ default: ++ sprintf(status_str, "%s", "None"); ++ break; ++ } ++ ++out: ++ return; ++} ++ + int + glusterd_volume_get_transport_type_str(glusterd_volinfo_t *volinfo, + char *transport_type_str) +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 6ad8062..5c6a453 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -781,6 +781,10 @@ glusterd_volume_get_type_str(glusterd_volinfo_t *volinfo, char **vol_type_str); + int + glusterd_volume_get_status_str(glusterd_volinfo_t *volinfo, char *status_str); + ++void ++glusterd_brick_get_status_str(glusterd_brickinfo_t *brickinfo, ++ char *status_str); ++ + int + glusterd_volume_get_transport_type_str(glusterd_volinfo_t *volinfo, + char *transport_type_str); +-- +1.8.3.1 + diff --git a/SOURCES/0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch b/SOURCES/0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch new file mode 100644 index 0000000..a9847ed --- /dev/null +++ b/SOURCES/0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch @@ -0,0 +1,893 @@ +From 308fe0d81dbef9f84bb1ad8e7309e3ffc28d6394 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:37:29 +0530 +Subject: [PATCH 230/255] Revert "glusterd/svc: update pid of mux volumes from + the shd process" + +This reverts commit b0815b8a84a07d17a1215c55afc38888ee9fc37c. +Label : DOWNSTREAM ONLY + +BUG: 1471742 +Change-Id: Iab11c686565e9a9c852f2b7c2d236fa1a348f96a +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175940 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfsd/src/gf_attach.c | 2 - + glusterfsd/src/glusterfsd-mgmt.c | 66 ++------- + libglusterfs/src/glusterfs/glusterfs.h | 2 +- + libglusterfs/src/glusterfs/libglusterfs-messages.h | 3 +- + libglusterfs/src/graph.c | 154 +-------------------- + rpc/xdr/src/glusterd1-xdr.x | 1 - + xlators/mgmt/glusterd/src/glusterd-handler.c | 2 - + xlators/mgmt/glusterd/src/glusterd-handshake.c | 42 +----- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 4 - + .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 25 ---- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 3 - + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 8 +- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 57 ++++---- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 2 - + xlators/mgmt/glusterd/src/glusterd-utils.c | 6 +- + 15 files changed, 52 insertions(+), 325 deletions(-) + +diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c +index 1bff854..6293b9b 100644 +--- a/glusterfsd/src/gf_attach.c ++++ b/glusterfsd/src/gf_attach.c +@@ -65,8 +65,6 @@ send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op) + brick_req.name = path; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; +- brick_req.dict.dict_val = NULL; +- brick_req.dict.dict_len = 0; + + req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); + iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index f930e0a..1d2cd1a 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -50,16 +50,13 @@ int + emancipate(glusterfs_ctx_t *ctx, int ret); + int + glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum, +- dict_t *dict); ++ char *volfile_id, char *checksum); + int + glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, +- gf_volfile_t *volfile_obj, char *checksum, +- dict_t *dict); ++ gf_volfile_t *volfile_obj, char *checksum); + int + glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum, +- dict_t *dict); ++ char *volfile_id, char *checksum); + int + glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj); + +@@ -78,8 +75,7 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + } + + int +-mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id, +- dict_t *dict) ++mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id) + { + glusterfs_ctx_t *ctx = NULL; + int ret = 0; +@@ -149,11 +145,11 @@ mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id, + * the volfile + */ + ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id, +- sha256_hash, dict); ++ sha256_hash); + goto unlock; + } + ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj, +- sha256_hash, dict); ++ sha256_hash); + if (ret < 0) { + gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!"); + } +@@ -391,8 +387,6 @@ err: + UNLOCK(&ctx->volfile_lock); + if (xlator_req.input.input_val) + free(xlator_req.input.input_val); +- if (xlator_req.dict.dict_val) +- free(xlator_req.dict.dict_val); + free(xlator_req.name); + xlator_req.name = NULL; + return 0; +@@ -567,8 +561,6 @@ out: + + free(xlator_req.name); + free(xlator_req.input.input_val); +- if (xlator_req.dict.dict_val) +- free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + if (dict) +@@ -990,8 +982,6 @@ out: + if (input) + dict_unref(input); + free(xlator_req.input.input_val); /*malloced by xdr*/ +- if (xlator_req.dict.dict_val) +- free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + free(xlator_req.name); +@@ -1072,8 +1062,6 @@ glusterfs_handle_attach(rpcsvc_request_t *req) + out: + UNLOCK(&ctx->volfile_lock); + } +- if (xlator_req.dict.dict_val) +- free(xlator_req.dict.dict_val); + free(xlator_req.input.input_val); + free(xlator_req.name); + +@@ -1089,7 +1077,6 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req) + }; + xlator_t *this = NULL; + glusterfs_ctx_t *ctx = NULL; +- dict_t *dict = NULL; + + GF_ASSERT(req); + this = THIS; +@@ -1104,41 +1091,20 @@ glusterfs_handle_svc_attach(rpcsvc_request_t *req) + req->rpc_err = GARBAGE_ARGS; + goto out; + } +- + gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, + "received attach " + "request for volfile-id=%s", + xlator_req.name); +- +- dict = dict_new(); +- if (!dict) { +- ret = -1; +- errno = ENOMEM; +- goto out; +- } +- +- ret = dict_unserialize(xlator_req.dict.dict_val, xlator_req.dict.dict_len, +- &dict); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, +- "failed to unserialize xdata to dictionary"); +- goto out; +- } +- dict->extra_stdfree = xlator_req.dict.dict_val; +- + ret = 0; + + if (ctx->active) { + ret = mgmt_process_volfile(xlator_req.input.input_val, +- xlator_req.input.input_len, xlator_req.name, +- dict); ++ xlator_req.input.input_len, xlator_req.name); + } else { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, + "got attach for %s but no active graph", xlator_req.name); + } + out: +- if (dict) +- dict_unref(dict); + if (xlator_req.input.input_val) + free(xlator_req.input.input_val); + if (xlator_req.name) +@@ -1275,8 +1241,6 @@ out: + GF_FREE(filepath); + if (xlator_req.input.input_val) + free(xlator_req.input.input_val); +- if (xlator_req.dict.dict_val) +- free(xlator_req.dict.dict_val); + + return ret; + } +@@ -1349,8 +1313,6 @@ out: + if (dict) + dict_unref(dict); + free(xlator_req.input.input_val); // malloced by xdr +- if (xlator_req.dict.dict_val) +- free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + free(xlator_req.name); // malloced by xdr +@@ -1499,8 +1461,6 @@ out: + if (output) + dict_unref(output); + free(brick_req.input.input_val); +- if (brick_req.dict.dict_val) +- free(brick_req.dict.dict_val); + free(brick_req.name); + GF_FREE(xname); + GF_FREE(msg); +@@ -1694,8 +1654,6 @@ out: + if (dict) + dict_unref(dict); + free(node_req.input.input_val); +- if (node_req.dict.dict_val) +- free(node_req.dict.dict_val); + GF_FREE(msg); + GF_FREE(rsp.output.output_val); + GF_FREE(node_name); +@@ -1799,8 +1757,6 @@ glusterfs_handle_nfs_profile(rpcsvc_request_t *req) + + out: + free(nfs_req.input.input_val); +- if (nfs_req.dict.dict_val) +- free(nfs_req.dict.dict_val); + if (dict) + dict_unref(dict); + if (output) +@@ -1879,8 +1835,6 @@ out: + if (dict) + dict_unref(dict); + free(xlator_req.input.input_val); // malloced by xdr +- if (xlator_req.dict.dict_val) +- free(xlator_req.dict.dict_val); + if (output) + dict_unref(output); + free(xlator_req.name); // malloced by xdr +@@ -2009,8 +1963,7 @@ out: + if (dict) + dict_unref(dict); + free(brick_req.input.input_val); +- if (brick_req.dict.dict_val) +- free(brick_req.dict.dict_val); ++ + gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; + } +@@ -2260,8 +2213,7 @@ volfile: + size = rsp.op_ret; + volfile_id = frame->local; + if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { +- ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id, +- dict); ++ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id); + goto post_graph_mgmt; + } + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index b6327b8..9ec2365 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -744,7 +744,7 @@ typedef struct { + char vol_id[NAME_MAX + 1]; + struct list_head volfile_list; + glusterfs_graph_t *graph; +- FILE *pidfp; ++ + } gf_volfile_t; + + glusterfs_ctx_t * +diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h +index 7e0eebb..ea2aa60 100644 +--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h ++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h +@@ -111,7 +111,6 @@ GLFS_MSGID( + LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG, + LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE, + LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED, +- LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED, +- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED); ++ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED); + + #endif /* !_LG_MESSAGES_H_ */ +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 05f76bf..172dc61 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1467,21 +1467,6 @@ out: + } + + int +-glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj) +-{ +- if (!volfile_obj || !volfile_obj->pidfp) +- return 0; +- +- gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id); +- +- lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0); +- fclose(volfile_obj->pidfp); +- volfile_obj->pidfp = NULL; +- +- return 0; +-} +- +-int + glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + { + xlator_t *last_xl = NULL; +@@ -1517,7 +1502,6 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + + list_del_init(&volfile_obj->volfile_list); + glusterfs_mux_xlator_unlink(parent_graph->top, xl); +- glusterfs_svc_mux_pidfile_cleanup(volfile_obj); + parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); + parent_graph->xl_count -= graph->xl_count; + parent_graph->leaf_count -= graph->leaf_count; +@@ -1547,126 +1531,8 @@ out: + } + + int +-glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file) +-{ +- int ret = -1; +- FILE *pidfp = NULL; +- +- if (!pid_file || !volfile_obj) +- goto out; +- +- if (volfile_obj->pidfp) { +- ret = 0; +- goto out; +- } +- pidfp = fopen(pid_file, "a+"); +- if (!pidfp) { +- goto out; +- } +- volfile_obj->pidfp = pidfp; +- +- ret = lockf(fileno(pidfp), F_TLOCK, 0); +- if (ret) { +- ret = 0; +- goto out; +- } +-out: +- return ret; +-} +- +-int +-glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj, +- const char *pid_file, pid_t pid) +-{ +- int ret = 0; +- FILE *pidfp = NULL; +- int old_pid; +- +- if (!volfile_obj->pidfp) { +- ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file); +- if (ret == -1) +- goto out; +- } +- pidfp = volfile_obj->pidfp; +- ret = fscanf(pidfp, "%d", &old_pid); +- if (ret <= 0) { +- goto update; +- } +- if (old_pid == pid) { +- ret = 0; +- goto out; +- } else { +- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, +- "Old pid=%d found in pidfile %s. Cleaning the old pid and " +- "Updating new pid=%d", +- old_pid, pid_file, pid); +- } +-update: +- ret = sys_ftruncate(fileno(pidfp), 0); +- if (ret) { +- gf_msg("glusterfsd", GF_LOG_ERROR, errno, +- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, +- "pidfile %s truncation failed", pid_file); +- goto out; +- } +- +- ret = fprintf(pidfp, "%d\n", pid); +- if (ret <= 0) { +- gf_msg("glusterfsd", GF_LOG_ERROR, errno, +- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed", +- pid_file); +- goto out; +- } +- +- ret = fflush(pidfp); +- if (ret) { +- gf_msg("glusterfsd", GF_LOG_ERROR, errno, +- LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed", +- pid_file); +- goto out; +- } +-out: +- return ret; +-} +- +-int +-glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj) +-{ +- char *file = NULL; +- int ret = -1; +- +- GF_VALIDATE_OR_GOTO("graph", dict, out); +- GF_VALIDATE_OR_GOTO("graph", volfile_obj, out); +- +- ret = dict_get_str(dict, "pidfile", &file); +- if (ret < 0) { +- gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, +- "Failed to get pidfile from dict for volfile_id=%s", +- volfile_obj->vol_id); +- } +- +- ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid()); +- if (ret < 0) { +- ret = -1; +- gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, +- "Failed to update " +- "the pidfile for volfile_id=%s", +- volfile_obj->vol_id); +- +- goto out; +- } +- +- if (ret == 1) +- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, +- "PID %d updated in pidfile=%s", getpid(), file); +- ret = 0; +-out: +- return ret; +-} +-int + glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum, +- dict_t *dict) ++ char *volfile_id, char *checksum) + { + glusterfs_graph_t *graph = NULL; + glusterfs_graph_t *parent_graph = NULL; +@@ -1749,25 +1615,18 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, + ret = -1; + goto out; + } +- volfile_obj->pidfp = NULL; +- snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", +- volfile_id); +- +- if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) { +- ret = glusterfs_update_mux_pid(dict, volfile_obj); +- if (ret == -1) { +- goto out; +- } +- } + + graph->used = 1; + parent_graph->id++; + list_add(&graph->list, &ctx->graphs); + INIT_LIST_HEAD(&volfile_obj->volfile_list); + volfile_obj->graph = graph; ++ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", ++ volfile_id); + memcpy(volfile_obj->volfile_checksum, checksum, + sizeof(volfile_obj->volfile_checksum)); + list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list); ++ + gf_log_dump_graph(fp, graph); + graph = NULL; + +@@ -1795,8 +1654,7 @@ out: + + int + glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, +- gf_volfile_t *volfile_obj, char *checksum, +- dict_t *dict) ++ gf_volfile_t *volfile_obj, char *checksum) + { + glusterfs_graph_t *oldvolfile_graph = NULL; + glusterfs_graph_t *newvolfile_graph = NULL; +@@ -1845,7 +1703,7 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + } + volfile_obj = NULL; + ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id, +- checksum, dict); ++ checksum); + goto out; + } + +diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x +index 02ebec2..9b36d34 100644 +--- a/rpc/xdr/src/glusterd1-xdr.x ++++ b/rpc/xdr/src/glusterd1-xdr.x +@@ -132,7 +132,6 @@ struct gd1_mgmt_brick_op_req { + string name<>; + int op; + opaque input<>; +- opaque dict<>; + } ; + + struct gd1_mgmt_brick_op_rsp { +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 94e1be5..ac788a0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -5423,8 +5423,6 @@ glusterd_print_client_details(FILE *fp, dict_t *dict, + + brick_req->op = GLUSTERD_BRICK_STATUS; + brick_req->name = ""; +- brick_req->dict.dict_val = NULL; +- brick_req->dict.dict_len = 0; + + ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"), + brickinfo->path); +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index 86dec82..1ba58c3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -203,7 +203,7 @@ out: + + size_t + build_volfile_path(char *volume_id, char *path, size_t path_len, +- char *trusted_str, dict_t *dict) ++ char *trusted_str) + { + struct stat stbuf = { + 0, +@@ -340,19 +340,11 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, + + ret = glusterd_volinfo_find(volid_ptr, &volinfo); + if (ret == -1) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Couldn't find volinfo for volid=%s", volid_ptr); ++ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); + goto out; + } + + glusterd_svc_build_shd_volfile_path(volinfo, path, path_len); +- +- ret = glusterd_svc_set_shd_pidfile(volinfo, dict); +- if (ret == -1) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, +- "Couldn't set pidfile in dict for volid=%s", volid_ptr); +- goto out; +- } + ret = 0; + goto out; + } +@@ -927,7 +919,6 @@ __server_getspec(rpcsvc_request_t *req) + char addrstr[RPCSVC_PEER_STRLEN] = {0}; + peer_info_t *peerinfo = NULL; + xlator_t *this = NULL; +- dict_t *dict = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -980,12 +971,6 @@ __server_getspec(rpcsvc_request_t *req) + goto fail; + } + +- dict = dict_new(); +- if (!dict) { +- ret = -ENOMEM; +- goto fail; +- } +- + trans = req->trans; + /* addrstr will be empty for cli socket connections */ + ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr)); +@@ -1004,26 +989,12 @@ __server_getspec(rpcsvc_request_t *req) + */ + if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) { + ret = build_volfile_path(volume, filename, sizeof(filename), +- TRUSTED_PREFIX, dict); ++ TRUSTED_PREFIX); + } else { +- ret = build_volfile_path(volume, filename, sizeof(filename), NULL, +- dict); ++ ret = build_volfile_path(volume, filename, sizeof(filename), NULL); + } + + if (ret == 0) { +- if (dict->count > 0) { +- ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val, +- &rsp.xdata.xdata_len); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, +- GD_MSG_DICT_SERL_LENGTH_GET_FAIL, +- "Failed to serialize dict " +- "to request buffer"); +- goto fail; +- } +- dict->extra_free = rsp.xdata.xdata_val; +- } +- + /* to allocate the proper buffer to hold the file data */ + ret = sys_stat(filename, &stbuf); + if (ret < 0) { +@@ -1065,6 +1036,7 @@ __server_getspec(rpcsvc_request_t *req) + goto fail; + } + } ++ + /* convert to XDR */ + fail: + if (spec_fd >= 0) +@@ -1084,10 +1056,6 @@ fail: + (xdrproc_t)xdr_gf_getspec_rsp); + free(args.key); // malloced by xdr + free(rsp.spec); +- +- if (dict) +- dict_unref(dict); +- + if (args.xdata.xdata_val) + free(args.xdata.xdata_val); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 454877b..9ea695e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -655,8 +655,6 @@ glusterd_brick_op_build_payload(glusterd_op_t op, + break; + } + +- brick_req->dict.dict_len = 0; +- brick_req->dict.dict_val = NULL; + ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, + &brick_req->input.input_len); + if (ret) +@@ -725,8 +723,6 @@ glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req, + goto out; + } + +- brick_req->dict.dict_len = 0; +- brick_req->dict.dict_val = NULL; + ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val, + &brick_req->input.input_len); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +index 5661e39..57ceda9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +@@ -126,28 +126,3 @@ glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) + out: + return; + } +- +-int +-glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict) +-{ +- int ret = -1; +- glusterd_svc_t *svc = NULL; +- xlator_t *this = NULL; +- +- this = THIS; +- GF_VALIDATE_OR_GOTO("glusterd", this, out); +- GF_VALIDATE_OR_GOTO(this->name, volinfo, out); +- GF_VALIDATE_OR_GOTO(this->name, dict, out); +- +- svc = &(volinfo->shd.svc); +- +- ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, +- "Failed to set pidfile %s in dict", svc->proc.pidfile); +- goto out; +- } +- ret = 0; +-out: +- return ret; +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +index 1f0984b..59466ec 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +@@ -36,7 +36,4 @@ glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, + int + glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo); + +-int +-glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict); +- + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 590169f..8ad90a9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -258,20 +258,14 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + gf_boolean_t shd_restart = _gf_false; + + conf = THIS->private; ++ volinfo = data; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- volinfo = data; + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + if (volinfo) + glusterd_volinfo_ref(volinfo); + +- if (volinfo->is_snap_volume) { +- /* healing of a snap volume is not supported yet*/ +- ret = 0; +- goto out; +- } +- + while (conf->restart_shd) { + synclock_unlock(&conf->big_lock); + sleep(2); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index e106111..400826f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -519,7 +519,7 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + /* Take first entry from the process */ + parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, + mux_svc); +- glusterd_copy_file(parent_svc->proc.pidfile, svc->proc.pidfile); ++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); + mux_conn = &parent_svc->conn; + if (volinfo) + volinfo->shd.attached = _gf_true; +@@ -623,9 +623,12 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, + glusterd_volinfo_t *volinfo = NULL; + glusterd_shdsvc_t *shd = NULL; + glusterd_svc_t *svc = frame->cookie; ++ glusterd_svc_t *parent_svc = NULL; ++ glusterd_svc_proc_t *mux_proc = NULL; + glusterd_conf_t *conf = NULL; + int *flag = (int *)frame->local; + xlator_t *this = THIS; ++ int pid = -1; + int ret = -1; + gf_getspec_rsp rsp = { + 0, +@@ -676,7 +679,27 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, + } + + if (rsp.op_ret == 0) { +- svc->online = _gf_true; ++ pthread_mutex_lock(&conf->attach_lock); ++ { ++ if (!strcmp(svc->name, "glustershd")) { ++ mux_proc = svc->svc_proc; ++ if (mux_proc && ++ !gf_is_service_running(svc->proc.pidfile, &pid)) { ++ /* ++ * When svc's are restarting, there is a chance that the ++ * attached svc might not have updated it's pid. Because ++ * it was at connection stage. So in that case, we need ++ * to retry the pid file copy. ++ */ ++ parent_svc = cds_list_entry(mux_proc->svcs.next, ++ glusterd_svc_t, mux_svc); ++ if (parent_svc) ++ sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); ++ } ++ } ++ svc->online = _gf_true; ++ } ++ pthread_mutex_unlock(&conf->attach_lock); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, + "svc %s of volume %s attached successfully to pid %d", svc->name, + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +@@ -703,7 +726,7 @@ out: + + extern size_t + build_volfile_path(char *volume_id, char *path, size_t path_len, +- char *trusted_str, dict_t *dict); ++ char *trusted_str); + + int + __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, +@@ -728,7 +751,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + ssize_t req_size = 0; + call_frame_t *frame = NULL; + gd1_mgmt_brick_op_req brick_req; +- dict_t *dict = NULL; + void *req = &brick_req; + void *errlbl = &&err; + struct rpc_clnt_connection *conn; +@@ -754,8 +776,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + brick_req.name = volfile_id; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; +- brick_req.dict.dict_val = NULL; +- brick_req.dict.dict_len = 0; + + frame = create_frame(this, this->ctx->pool); + if (!frame) { +@@ -763,13 +783,7 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + } + + if (op == GLUSTERD_SVC_ATTACH) { +- dict = dict_new(); +- if (!dict) { +- ret = -ENOMEM; +- goto *errlbl; +- } +- +- (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict); ++ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL); + + ret = sys_stat(path, &stbuf); + if (ret < 0) { +@@ -804,18 +818,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + ret = -EIO; + goto *errlbl; + } +- if (dict->count > 0) { +- ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val, +- &brick_req.dict.dict_len); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, +- GD_MSG_DICT_SERL_LENGTH_GET_FAIL, +- "Failed to serialize dict " +- "to request buffer"); +- goto *errlbl; +- } +- dict->extra_free = brick_req.dict.dict_val; +- } + + frame->cookie = svc; + frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int); +@@ -860,8 +862,6 @@ __glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, + GF_ATOMIC_INC(conf->blockers); + ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, + iobref, frame, NULL, 0, NULL, 0, NULL); +- if (dict) +- dict_unref(dict); + GF_FREE(volfile_content); + if (spec_fd >= 0) + sys_close(spec_fd); +@@ -874,9 +874,6 @@ maybe_free_iobuf: + iobuf_unref(iobuf); + } + err: +- if (dict) +- dict_unref(dict); +- + GF_FREE(volfile_content); + if (spec_fd >= 0) + sys_close(spec_fd); +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index a8098df..618d8bc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -143,8 +143,6 @@ gd_brick_op_req_free(gd1_mgmt_brick_op_req *req) + if (!req) + return; + +- if (req->dict.dict_val) +- GF_FREE(req->dict.dict_val); + GF_FREE(req->input.input_val); + GF_FREE(req); + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 3bdfd49..4525ec7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5914,8 +5914,6 @@ send_attach_req(xlator_t *this, struct rpc_clnt *rpc, char *path, + brick_req.name = path; + brick_req.input.input_val = NULL; + brick_req.input.input_len = 0; +- brick_req.dict.dict_val = NULL; +- brick_req.dict.dict_len = 0; + + req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); + iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); +@@ -5979,7 +5977,7 @@ err: + + extern size_t + build_volfile_path(char *volume_id, char *path, size_t path_len, +- char *trusted_str, dict_t *dict); ++ char *trusted_str); + + static int + attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, +@@ -6024,7 +6022,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, + goto out; + } + +- (void)build_volfile_path(full_id, path, sizeof(path), NULL, NULL); ++ (void)build_volfile_path(full_id, path, sizeof(path), NULL); + + for (tries = 15; tries > 0; --tries) { + rpc = rpc_clnt_ref(other_brick->rpc); +-- +1.8.3.1 + diff --git a/SOURCES/0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch b/SOURCES/0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch new file mode 100644 index 0000000..3b794c9 --- /dev/null +++ b/SOURCES/0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch @@ -0,0 +1,180 @@ +From 21f376939f03f91214218c485e7d3a2848dae4b2 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:43:44 +0530 +Subject: [PATCH 231/255] Revert "graph/shd: Use top down approach while + cleaning xlator" + +This reverts commit b963fa8bb71963127147d33bf609f439dd5bd107. + +Label : DOWNSTREAM ONLY + +BUG: 1471742 +Change-Id: Ifb8056395c5988cf7c484891bea052f5415bf9da +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175941 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/graph.c | 10 +--------- + xlators/features/bit-rot/src/stub/bit-rot-stub.c | 1 - + xlators/features/changelog/src/changelog.c | 1 - + xlators/features/cloudsync/src/cloudsync.c | 4 +--- + xlators/features/index/src/index.c | 1 - + xlators/features/quiesce/src/quiesce.c | 1 - + xlators/features/read-only/src/worm.c | 1 - + xlators/features/sdfs/src/sdfs.c | 1 - + xlators/features/selinux/src/selinux.c | 2 -- + xlators/features/trash/src/trash.c | 1 - + 10 files changed, 2 insertions(+), 21 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 172dc61..5b95fd6 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1193,14 +1193,6 @@ glusterfs_graph_fini(glusterfs_graph_t *graph) + if (trav->init_succeeded) { + trav->cleanup_starting = 1; + trav->fini(trav); +- if (trav->local_pool) { +- mem_pool_destroy(trav->local_pool); +- trav->local_pool = NULL; +- } +- if (trav->itable) { +- inode_table_destroy(trav->itable); +- trav->itable = NULL; +- } + trav->init_succeeded = 0; + } + trav = trav->next; +@@ -1402,7 +1394,7 @@ glusterfs_graph_cleanup(void *arg) + + pthread_mutex_lock(&ctx->cleanup_lock); + { +- glusterfs_graph_fini(graph); ++ glusterfs_graph_deactivate(graph); + glusterfs_graph_destroy(graph); + } + pthread_mutex_unlock(&ctx->cleanup_lock); +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +index 03446be..3f48a4b 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +@@ -185,7 +185,6 @@ cleanup_lock: + pthread_mutex_destroy(&priv->lock); + free_mempool: + mem_pool_destroy(priv->local_pool); +- priv->local_pool = NULL; + free_priv: + GF_FREE(priv); + this->private = NULL; +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index 2862d1e..d9025f3 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -2790,7 +2790,6 @@ cleanup_options: + changelog_freeup_options(this, priv); + cleanup_mempool: + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; + cleanup_priv: + GF_FREE(priv); + error_return: +diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c +index 0ad987e..26e512c 100644 +--- a/xlators/features/cloudsync/src/cloudsync.c ++++ b/xlators/features/cloudsync/src/cloudsync.c +@@ -200,10 +200,8 @@ cs_init(xlator_t *this) + + out: + if (ret == -1) { +- if (this->local_pool) { ++ if (this->local_pool) + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; +- } + + cs_cleanup_private(priv); + +diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c +index 4ece7ff..2f2a6d0 100644 +--- a/xlators/features/index/src/index.c ++++ b/xlators/features/index/src/index.c +@@ -2478,7 +2478,6 @@ out: + GF_FREE(priv); + this->private = NULL; + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; + } + + if (attr_inited) +diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c +index 06f58c9..bfd1116 100644 +--- a/xlators/features/quiesce/src/quiesce.c ++++ b/xlators/features/quiesce/src/quiesce.c +@@ -2536,7 +2536,6 @@ fini(xlator_t *this) + this->private = NULL; + + mem_pool_destroy(priv->local_pool); +- priv->local_pool = NULL; + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); + out: +diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c +index 7d13180..24196f8 100644 +--- a/xlators/features/read-only/src/worm.c ++++ b/xlators/features/read-only/src/worm.c +@@ -569,7 +569,6 @@ fini(xlator_t *this) + mem_put(priv); + this->private = NULL; + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; + out: + return; + } +diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c +index 164c632..f0247fd 100644 +--- a/xlators/features/sdfs/src/sdfs.c ++++ b/xlators/features/sdfs/src/sdfs.c +@@ -1429,7 +1429,6 @@ void + fini(xlator_t *this) + { + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; + return; + } + +diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c +index ce5fc90..58b4c5d 100644 +--- a/xlators/features/selinux/src/selinux.c ++++ b/xlators/features/selinux/src/selinux.c +@@ -256,7 +256,6 @@ out: + GF_FREE(priv); + } + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; + } + return ret; + } +@@ -285,7 +284,6 @@ fini(xlator_t *this) + GF_FREE(priv); + + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; + + return; + } +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index eb5007b..d668436 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -2523,7 +2523,6 @@ out: + GF_FREE(priv); + } + mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; + } + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch b/SOURCES/0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch new file mode 100644 index 0000000..b2a8f4c --- /dev/null +++ b/SOURCES/0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch @@ -0,0 +1,228 @@ +From 3ddf12d0710e048878fcf8786d05efe18710c74c Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Fri, 12 Jul 2019 16:44:20 +0530 +Subject: [PATCH 232/255] cluster/afr: Fix incorrect reporting of gfid & type + mismatch + +Backport of: https://review.gluster.org/#/c/glusterfs/+/22908/ + +Problems: +1. When checking for type and gfid mismatch, if the type or gfid +is unknown because of missing gfid handle and the gfid xattr +it will be reported as type or gfid mismatch and the heal will +not complete. + +2. If the source selected during entry heal has null gfid the same +will be sent to afr_lookup_and_heal_gfid(). In this function when +we try to assign the gfid on the bricks where it does not exist, +we are considering the same gfid and try to assign that on those +bricks. This will fail in posix_gfid_set() since the gfid sent +is null. + +Fix: +If the gfid sent to afr_lookup_and_heal_gfid() is null choose a +valid gfid before proceeding to assign the gfid on the bricks +where it is missing. + +In afr_selfheal_detect_gfid_and_type_mismatch(), do not report +type/gfid mismatch if the type/gfid is unknown or not set. + +Change-Id: Icdb4967c09a48e0a3a64ce4948d5fb0a06d7a7af +fixes: bz#1715447 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175966 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1722507-type-mismatch-error-handling.t | 116 +++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-common.c | 12 ++- + xlators/cluster/afr/src/afr-self-heal-entry.c | 13 +++ + 3 files changed, 139 insertions(+), 2 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t + +diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t +new file mode 100644 +index 0000000..0aeaaaf +--- /dev/null ++++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t +@@ -0,0 +1,116 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++ ++########################################################################################## ++# GFID link file and the GFID is missing on one brick and all the bricks are being blamed. ++ ++TEST touch $M0/dir/file ++#TEST kill_brick $V0 $H0 $B0/$V0"1" ++ ++#B0 and B2 must blame B1 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Remove the gfid xattr and the link file on one brick. ++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file) ++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file) ++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file ++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++# Wait for 2 second to force posix to consider that this is a valid file but ++# without gfid. ++sleep 2 ++TEST $CLI volume heal $V0 ++ ++# Heal should not fail as the file is missing gfid xattr and the link file, ++# which is not actually the gfid or type mismatch. ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file ++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++rm -f $M0/dir/file ++ ++ ++########################################################################################### ++# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed. ++ ++TEST $CLI volume heal $V0 disable ++TEST touch $M0/dir/file ++#TEST kill_brick $V0 $H0 $B0/$V0"1" ++ ++#B0 and B2 must blame B1 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Remove the gfid xattr and the link file on two bricks. ++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file) ++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file) ++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file ++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file ++TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++# Wait for 2 second to force posix to consider that this is a valid file but ++# without gfid. ++sleep 2 ++TEST $CLI volume heal $V0 ++ ++# Heal should not fail as the file is missing gfid xattr and the link file, ++# which is not actually the gfid or type mismatch. ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file ++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file ++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 5157e7d..b38085a 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -55,7 +55,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name, + for (i = 0; i < priv->child_count; i++) { + if (source == -1) { + /* case (a) above. */ +- if (replies[i].valid && replies[i].op_ret == 0) { ++ if (replies[i].valid && replies[i].op_ret == 0 && ++ replies[i].poststat.ia_type != IA_INVAL) { + ia_type = replies[i].poststat.ia_type; + break; + } +@@ -63,7 +64,8 @@ afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name, + /* case (b) above. */ + if (i == source) + continue; +- if (sources[i] && replies[i].valid && replies[i].op_ret == 0) { ++ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 && ++ replies[i].poststat.ia_type != IA_INVAL) { + ia_type = replies[i].poststat.ia_type; + break; + } +@@ -77,6 +79,12 @@ heal: + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; ++ ++ if (gf_uuid_is_null(gfid) && ++ !gf_uuid_is_null(replies[i].poststat.ia_gfid) && ++ replies[i].poststat.ia_type == ia_type) ++ gfid = replies[i].poststat.ia_gfid; ++ + if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) || + replies[i].poststat.ia_type != ia_type) + continue; +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index a6890fa..e07b521 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -246,6 +246,19 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this, + if (replies[i].op_ret != 0) + continue; + ++ if (gf_uuid_is_null(replies[i].poststat.ia_gfid)) ++ continue; ++ ++ if (replies[i].poststat.ia_type == IA_INVAL) ++ continue; ++ ++ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) { ++ src_idx = i; ++ ia_type = replies[src_idx].poststat.ia_type; ++ gfid = &replies[src_idx].poststat.ia_gfid; ++ continue; ++ } ++ + if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) && + (ia_type == replies[i].poststat.ia_type)) { + ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, +-- +1.8.3.1 + diff --git a/SOURCES/0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch b/SOURCES/0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch new file mode 100644 index 0000000..d8e6933 --- /dev/null +++ b/SOURCES/0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch @@ -0,0 +1,78 @@ +From 5c85ce7363b658bc8fa643742626109efe3ade0c Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:44:04 +0530 +Subject: [PATCH 233/255] Revert "graph/shd: Use glusterfs_graph_deactivate to + free the xl rec" + +This reverts commit 8cc6d8af00303c445b94715c92fe9e3e01edb867. + +BUG: 1471742 +Change-Id: Ib90fe89b85f4143db29702338decec76c83872bc +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175942 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/graph.c | 2 +- + libglusterfs/src/xlator.c | 9 +-------- + xlators/features/shard/src/shard.c | 3 --- + 3 files changed, 2 insertions(+), 12 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 5b95fd6..27d9335 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1394,7 +1394,7 @@ glusterfs_graph_cleanup(void *arg) + + pthread_mutex_lock(&ctx->cleanup_lock); + { +- glusterfs_graph_deactivate(graph); ++ glusterfs_graph_fini(graph); + glusterfs_graph_destroy(graph); + } + pthread_mutex_unlock(&ctx->cleanup_lock); +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index d9d3441..71e1ed4 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -659,7 +659,6 @@ xlator_fini_rec(xlator_t *xl) + trav = trav->next; + } + +- xl->cleanup_starting = 1; + if (xl->init_succeeded) { + if (xl->fini) { + old_THIS = THIS; +@@ -667,14 +666,8 @@ xlator_fini_rec(xlator_t *xl) + + xl->fini(xl); + +- if (xl->local_pool) { ++ if (xl->local_pool) + mem_pool_destroy(xl->local_pool); +- xl->local_pool = NULL; +- } +- if (xl->itable) { +- inode_table_destroy(xl->itable); +- xl->itable = NULL; +- } + + THIS = old_THIS; + } else { +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 31c7eec..b248767 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -6785,9 +6785,6 @@ fini(xlator_t *this) + + GF_VALIDATE_OR_GOTO("shard", this, out); + +- /*Itable was not created by shard, hence setting to NULL.*/ +- this->itable = NULL; +- + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; + +-- +1.8.3.1 + diff --git a/SOURCES/0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch b/SOURCES/0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch new file mode 100644 index 0000000..790d9d1 --- /dev/null +++ b/SOURCES/0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch @@ -0,0 +1,220 @@ +From feeee9a35c1219b2077ea07b6fd80976960bd181 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:44:42 +0530 +Subject: [PATCH 234/255] Revert "glusterd/shd: Change shd logfile to a unique + name" + +This reverts commit 541e1400ecaec5fea0f56e8ca18f00c229906d8a. + +BUG: 1471742 +Change-Id: I7e0371d77db6897981f7364c04d4b9b523b865ba +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175943 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 12 ++++++++ + .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 6 ++++ + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ++++----- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 34 +++++----------------- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 4 --- + 6 files changed, 34 insertions(+), 40 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +index 57ceda9..9196758 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +@@ -75,6 +75,18 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, + } + + void ++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len) ++{ ++ snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname); ++} ++ ++void ++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len) ++{ ++ snprintf(logfile, len, "%s/shd.log", logdir); ++} ++ ++void + glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) + { + glusterd_svc_proc_t *svc_proc = NULL; +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +index 59466ec..c70702c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +@@ -27,6 +27,12 @@ glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, + int path_len); + + void ++glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len); ++ ++void ++glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len); ++ ++void + glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd); + + int +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 8ad90a9..dbe2560 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -90,8 +90,8 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); + glusterd_svc_create_rundir(rundir); + +- glusterd_svc_build_logfile_path(shd_svc_name, DEFAULT_LOG_FILE_DIRECTORY, +- logfile, sizeof(logfile)); ++ glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir)); ++ glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile)); + + /* Initialize the connection mgmt */ + if (mux_conn && mux_svc->rpc) { +@@ -104,7 +104,7 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + if (ret < 0) + goto out; + } else { +- ret = mkdir_p(DEFAULT_LOG_FILE_DIRECTORY, 0755, _gf_true); ++ ret = mkdir_p(logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { + gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, + "Unable to create logdir %s", logdir); +@@ -460,7 +460,6 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + return -1; + + glusterd_volinfo_ref(volinfo); +- + if (!svc->inited) { + ret = glusterd_shd_svc_mux_init(volinfo, svc); + if (ret) +@@ -472,11 +471,12 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + /* Unref will happen from glusterd_svc_attach_cbk */ + ret = glusterd_attach_svc(svc, volinfo, flags); + if (ret) { ++ glusterd_volinfo_unref(volinfo); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Failed to attach shd svc(volume=%s) to pid=%d", ++ "Failed to attach shd svc(volume=%s) to pid=%d. Starting" ++ "a new process", + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- glusterd_shd_svcproc_cleanup(&volinfo->shd); +- glusterd_volinfo_unref(volinfo); ++ ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags); + } + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index 400826f..a6e662f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -469,9 +469,6 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + glusterd_conf_t *conf = NULL; + glusterd_svc_t *parent_svc = NULL; + int pid = -1; +- char pidfile[PATH_MAX] = { +- 0, +- }; + + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); +@@ -481,26 +478,8 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + + pthread_mutex_lock(&conf->attach_lock); + { +- if (svc->inited && !glusterd_proc_is_running(&(svc->proc))) { +- /* This is the case when shd process was abnormally killed */ +- pthread_mutex_unlock(&conf->attach_lock); +- glusterd_shd_svcproc_cleanup(&volinfo->shd); +- pthread_mutex_lock(&conf->attach_lock); +- } +- + if (!svc->inited) { +- glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile)); +- ret = snprintf(svc->proc.name, sizeof(svc->proc.name), "%s", +- "glustershd"); +- if (ret < 0) +- goto unlock; +- +- ret = snprintf(svc->proc.pidfile, sizeof(svc->proc.pidfile), "%s", +- pidfile); +- if (ret < 0) +- goto unlock; +- +- if (gf_is_service_running(pidfile, &pid)) { ++ if (gf_is_service_running(svc->proc.pidfile, &pid)) { + /* Just connect is required, but we don't know what happens + * during the disconnect. So better to reattach. + */ +@@ -508,10 +487,10 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + } + + if (!mux_proc) { +- if (pid != -1 && sys_access(pidfile, R_OK) == 0) { ++ if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) { + /* stale pid file, stop and unlink it */ + glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE); +- glusterd_unlink_file(pidfile); ++ glusterd_unlink_file(svc->proc.pidfile); + } + mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); + } +@@ -705,10 +684,11 @@ glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, + volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + } else { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, +- "svc %s of volume %s failed to attach to pid %d", svc->name, +- volinfo->volname, glusterd_proc_get_pid(&svc->proc)); ++ "svc %s of volume %s failed to " ++ "attach to pid %d. Starting a new process", ++ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); + if (!strcmp(svc->name, "glustershd")) { +- glusterd_shd_svcproc_cleanup(&volinfo->shd); ++ glusterd_recover_shd_attach_failure(volinfo, svc, *flag); + } + } + out: +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +index fa316a6..f32dafc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +@@ -33,14 +33,14 @@ glusterd_svc_create_rundir(char *rundir) + return ret; + } + +-void ++static void + glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile, + size_t len) + { + snprintf(logfile, len, "%s/%s.log", logdir, server); + } + +-void ++static void + glusterd_svc_build_volfileid_path(char *server, char *volfileid, size_t len) + { + snprintf(volfileid, len, "gluster/%s", server); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +index 5a5466a..fbc5225 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +@@ -74,10 +74,6 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile, + size_t len); + + void +-glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile, +- size_t len); +- +-void + glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len); + + void +-- +1.8.3.1 + diff --git a/SOURCES/0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch b/SOURCES/0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch new file mode 100644 index 0000000..67348f6 --- /dev/null +++ b/SOURCES/0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch @@ -0,0 +1,38 @@ +From b2040d8404e0ac44742cb903e3c8da2c832b2925 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:45:11 +0530 +Subject: [PATCH 235/255] Revert "glusterd/svc: Stop stale process using the + glusterd_proc_stop" + +This reverts commit fe9159ee42f0f67b01e6a495df8105ea0f66738d. + +BUG: 1471742 +Change-Id: Id5ac0d21319724141ad9bcb9b66435803ebe5f47 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175944 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index a6e662f..6a3ca52 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -488,9 +488,9 @@ glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) + + if (!mux_proc) { + if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) { +- /* stale pid file, stop and unlink it */ +- glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE); +- glusterd_unlink_file(svc->proc.pidfile); ++ /* stale pid file, unlink it. */ ++ kill(pid, SIGTERM); ++ sys_unlink(svc->proc.pidfile); + } + mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); + } +-- +1.8.3.1 + diff --git a/SOURCES/0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch b/SOURCES/0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch new file mode 100644 index 0000000..e33c7dd --- /dev/null +++ b/SOURCES/0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch @@ -0,0 +1,35 @@ +From 030b5681d47268c591a72035d5a2419234bd1f5f Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:44:55 +0530 +Subject: [PATCH 236/255] Revert "shd/mux: Fix race between mux_proc unlink and + stop" + +This reverts commit e386fb4f4baf834e6a8fc25cc2fbbb17eb0a7a56. + +BUG: 1471742 +Change-Id: I6c52835981389fc5bfeb43483feb581ad8507990 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175945 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index dbe2560..d81d760 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -694,9 +694,6 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) + gf_is_service_running(svc->proc.pidfile, &pid); + cds_list_del_init(&svc->mux_svc); + empty = cds_list_empty(&svc_proc->svcs); +- if (empty) { +- cds_list_del_init(&svc_proc->svc_proc_list); +- } + } + pthread_mutex_unlock(&conf->attach_lock); + if (empty) { +-- +1.8.3.1 + diff --git a/SOURCES/0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch b/SOURCES/0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch new file mode 100644 index 0000000..6c88d6a --- /dev/null +++ b/SOURCES/0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch @@ -0,0 +1,227 @@ +From f0c3af09fd919e3646aae2821b0d6bfe4e2fd89c Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:45:58 +0530 +Subject: [PATCH 237/255] Revert "ec/fini: Fix race between xlator cleanup and + on going async fop" + +This reverts commit 9fd966aa6879ac9867381629f82eca24b950d731. + +BUG: 1471742 +Change-Id: I557ec138174b01d8b8f8d090acd34c179e2c632d +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175946 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 10 ---------- + xlators/cluster/ec/src/ec-common.h | 2 -- + xlators/cluster/ec/src/ec-data.c | 4 +--- + xlators/cluster/ec/src/ec-heal.c | 17 ++--------------- + xlators/cluster/ec/src/ec-types.h | 1 - + xlators/cluster/ec/src/ec.c | 37 ++++++++++++------------------------- + 6 files changed, 15 insertions(+), 56 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 35c2256..e2e582f 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2956,13 +2956,3 @@ ec_manager(ec_fop_data_t *fop, int32_t error) + + __ec_manager(fop, error); + } +- +-gf_boolean_t +-__ec_is_last_fop(ec_t *ec) +-{ +- if ((list_empty(&ec->pending_fops)) && +- (GF_ATOMIC_GET(ec->async_fop_count) == 0)) { +- return _gf_true; +- } +- return _gf_false; +-} +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index bf6c97d..e948342 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -204,6 +204,4 @@ void + ec_reset_entry_healing(ec_fop_data_t *fop); + char * + ec_msg_str(ec_fop_data_t *fop); +-gf_boolean_t +-__ec_is_last_fop(ec_t *ec); + #endif /* __EC_COMMON_H__ */ +diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c +index 8d2d9a1..6ef9340 100644 +--- a/xlators/cluster/ec/src/ec-data.c ++++ b/xlators/cluster/ec/src/ec-data.c +@@ -202,13 +202,11 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify) + { + ec_t *ec = fop->xl->private; + +- *notify = _gf_false; +- + if (!list_empty(&fop->pending_list)) { + LOCK(&ec->lock); + { + list_del_init(&fop->pending_list); +- *notify = __ec_is_last_fop(ec); ++ *notify = list_empty(&ec->pending_fops); + } + UNLOCK(&ec->lock); + } +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 237fea2..8844c29 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2814,20 +2814,8 @@ int + ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque) + { + ec_t *ec = opaque; +- gf_boolean_t last_fop = _gf_false; + +- if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) { +- LOCK(&ec->lock); +- { +- last_fop = __ec_is_last_fop(ec); +- } +- UNLOCK(&ec->lock); +- } + gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret); +- +- if (last_fop) +- ec_pending_fops_completed(ec); +- + return 0; + } + +@@ -2881,15 +2869,14 @@ ec_launch_replace_heal(ec_t *ec) + { + int ret = -1; + ++ if (!ec) ++ return ret; + ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap, + ec_replace_heal_done, NULL, ec); +- + if (ret < 0) { + gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d", + ret); +- ec_replace_heal_done(-1, NULL, ec); + } +- + return ret; + } + +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 4dbf4a3..1c295c0 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -643,7 +643,6 @@ struct _ec { + uintptr_t xl_notify; /* Bit flag representing + notification for bricks. */ + uintptr_t node_mask; +- gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */ + xlator_t **xl_list; + gf_lock_t lock; + gf_timer_t *timer; +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index f0d58c0..df5912c 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -355,7 +355,6 @@ ec_notify_cbk(void *data) + ec_t *ec = data; + glusterfs_event_t event = GF_EVENT_MAXVAL; + gf_boolean_t propagate = _gf_false; +- gf_boolean_t launch_heal = _gf_false; + + LOCK(&ec->lock); + { +@@ -385,11 +384,6 @@ ec_notify_cbk(void *data) + * still bricks DOWN, they will be healed when they + * come up. */ + ec_up(ec->xl, ec); +- +- if (ec->shd.iamshd && !ec->shutdown) { +- launch_heal = _gf_true; +- GF_ATOMIC_INC(ec->async_fop_count); +- } + } + + propagate = _gf_true; +@@ -397,12 +391,13 @@ ec_notify_cbk(void *data) + unlock: + UNLOCK(&ec->lock); + +- if (launch_heal) { +- /* We have just brought the volume UP, so we trigger +- * a self-heal check on the root directory. */ +- ec_launch_replace_heal(ec); +- } + if (propagate) { ++ if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) { ++ /* We have just brought the volume UP, so we trigger ++ * a self-heal check on the root directory. */ ++ ec_launch_replace_heal(ec); ++ } ++ + default_notify(ec->xl, event, NULL); + } + } +@@ -430,7 +425,7 @@ ec_disable_delays(ec_t *ec) + { + ec->shutdown = _gf_true; + +- return __ec_is_last_fop(ec); ++ return list_empty(&ec->pending_fops); + } + + void +@@ -608,10 +603,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + if (event == GF_EVENT_CHILD_UP) { + /* We need to trigger a selfheal if a brick changes + * to UP state. */ +- if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd && +- !ec->shutdown) { +- needs_shd_check = _gf_true; +- } ++ needs_shd_check = ec_set_up_state(ec, mask, mask); + } else if (event == GF_EVENT_CHILD_DOWN) { + ec_set_up_state(ec, mask, 0); + } +@@ -641,21 +633,17 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + } + } else { + propagate = _gf_false; +- needs_shd_check = _gf_false; +- } +- +- if (needs_shd_check) { +- GF_ATOMIC_INC(ec->async_fop_count); + } + } + unlock: + UNLOCK(&ec->lock); + + done: +- if (needs_shd_check) { +- ec_launch_replace_heal(ec); +- } + if (propagate) { ++ if (needs_shd_check && ec->shd.iamshd) { ++ ec_launch_replace_heal(ec); ++ } ++ + error = default_notify(this, event, data); + } + +@@ -717,7 +705,6 @@ init(xlator_t *this) + ec->xl = this; + LOCK_INIT(&ec->lock); + +- GF_ATOMIC_INIT(ec->async_fop_count, 0); + INIT_LIST_HEAD(&ec->pending_fops); + INIT_LIST_HEAD(&ec->heal_waiting); + INIT_LIST_HEAD(&ec->healing); +-- +1.8.3.1 + diff --git a/SOURCES/0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch b/SOURCES/0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch new file mode 100644 index 0000000..0514cd1 --- /dev/null +++ b/SOURCES/0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch @@ -0,0 +1,47 @@ +From 96072cea4da1c2ba5bd87307f20b3ee2cbe6f63d Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:46:10 +0530 +Subject: [PATCH 238/255] Revert "xlator/log: Add more logging in + xlator_is_cleanup_starting" + +This reverts commit 9b94397a5a735910fab2a29670146a1feb6d890e. + +BUG: 1471742 +Change-Id: Icc3f0c8741ed780e265202adbb009063f657c7f7 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175947 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/xlator.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 71e1ed4..fbfbbe2 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1494,18 +1494,12 @@ xlator_is_cleanup_starting(xlator_t *this) + glusterfs_graph_t *graph = NULL; + xlator_t *xl = NULL; + +- if (!this) { +- gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, +- "xlator object is null, returning false"); ++ if (!this) + goto out; +- } +- + graph = this->graph; +- if (!graph) { +- gf_msg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, +- "Graph is not set for xlator %s", this->name); ++ ++ if (!graph) + goto out; +- } + + xl = graph->first; + if (xl && xl->cleanup_starting) +-- +1.8.3.1 + diff --git a/SOURCES/0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch b/SOURCES/0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch new file mode 100644 index 0000000..f36c997 --- /dev/null +++ b/SOURCES/0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch @@ -0,0 +1,128 @@ +From ad40c0783e84e5e54a83aeb20a52f720cc881b0c Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:46:22 +0530 +Subject: [PATCH 239/255] Revert "ec/fini: Fix race with ec_fini and ec_notify" + +This reverts commit 998d9b8b5e271f407e1c654c34f45f0db36abc71. + +BUG: 1471742 +Change-Id: Ifccb8a22d9ef96c22b32dcb4b82bf4d21cf85484 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175948 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 3 --- + libglusterfs/src/libglusterfs.sym | 1 - + libglusterfs/src/xlator.c | 21 --------------------- + xlators/cluster/ec/src/ec-heal.c | 4 ---- + xlators/cluster/ec/src/ec-heald.c | 6 ------ + xlators/cluster/ec/src/ec.c | 3 --- + 6 files changed, 38 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 09e463e..8998976 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1092,7 +1092,4 @@ gluster_graph_take_reference(xlator_t *tree); + + gf_boolean_t + mgmt_is_multiplexed_daemon(char *name); +- +-gf_boolean_t +-xlator_is_cleanup_starting(xlator_t *this); + #endif /* _XLATOR_H */ +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 86215d2..05f93b4 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1160,4 +1160,3 @@ glusterfs_process_svc_attach_volfp + glusterfs_mux_volfile_reconfigure + glusterfs_process_svc_detach + mgmt_is_multiplexed_daemon +-xlator_is_cleanup_starting +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index fbfbbe2..022c3ed 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1486,24 +1486,3 @@ mgmt_is_multiplexed_daemon(char *name) + } + return _gf_false; + } +- +-gf_boolean_t +-xlator_is_cleanup_starting(xlator_t *this) +-{ +- gf_boolean_t cleanup = _gf_false; +- glusterfs_graph_t *graph = NULL; +- xlator_t *xl = NULL; +- +- if (!this) +- goto out; +- graph = this->graph; +- +- if (!graph) +- goto out; +- +- xl = graph->first; +- if (xl && xl->cleanup_starting) +- cleanup = _gf_true; +-out: +- return cleanup; +-} +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 8844c29..2fa1f11 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2855,10 +2855,6 @@ ec_replace_brick_heal_wrap(void *opaque) + itable = ec->xl->itable; + else + goto out; +- +- if (xlator_is_cleanup_starting(ec->xl)) +- goto out; +- + ret = ec_replace_heal(ec, itable->root); + out: + return ret; +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index 91512d7..edf5e11 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -444,9 +444,6 @@ unlock: + int + ec_shd_full_healer_spawn(xlator_t *this, int subvol) + { +- if (xlator_is_cleanup_starting(this)) +- return -1; +- + return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol), + ec_shd_full_healer); + } +@@ -454,9 +451,6 @@ ec_shd_full_healer_spawn(xlator_t *this, int subvol) + int + ec_shd_index_healer_spawn(xlator_t *this, int subvol) + { +- if (xlator_is_cleanup_starting(this)) +- return -1; +- + return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol), + ec_shd_index_healer); + } +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index df5912c..264582a 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -486,9 +486,6 @@ ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state) + { + uintptr_t current_state = 0; + +- if (xlator_is_cleanup_starting(ec->xl)) +- return _gf_false; +- + if ((ec->xl_notify & index_mask) == 0) { + ec->xl_notify |= index_mask; + ec->xl_notify_count++; +-- +1.8.3.1 + diff --git a/SOURCES/0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch b/SOURCES/0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch new file mode 100644 index 0000000..54ef75e --- /dev/null +++ b/SOURCES/0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch @@ -0,0 +1,54 @@ +From 9b3adb28207681f49ea97fc2c473634ff0f73db6 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:46:35 +0530 +Subject: [PATCH 240/255] Revert "glusterd/shd: Optimize the glustershd manager + to send reconfigure" + +This reverts commit 321080e55f0ae97115a9542ba5de8494e7610860. + +BUG: 1471742 +Change-Id: I5fa84baa3c3e72ca8eb605c7f1fafb53c68859f9 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175949 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/graph.c | 1 + + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 9 ++++----- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 27d9335..18fb2d9 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1497,6 +1497,7 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); + parent_graph->xl_count -= graph->xl_count; + parent_graph->leaf_count -= graph->leaf_count; ++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl); + parent_graph->id++; + ret = 0; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index d81d760..981cc87 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -311,11 +311,10 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + */ + ret = svc->stop(svc, SIGTERM); + } else if (volinfo) { +- if (volinfo->status != GLUSTERD_STATUS_STARTED) { +- ret = svc->stop(svc, SIGTERM); +- if (ret) +- goto out; +- } ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) ++ goto out; ++ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = svc->start(svc, flags); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch b/SOURCES/0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch new file mode 100644 index 0000000..965fcfe --- /dev/null +++ b/SOURCES/0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch @@ -0,0 +1,82 @@ +From 066189add979d2e4c74463592e5021bd060d5a51 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:46:47 +0530 +Subject: [PATCH 241/255] Revert "glusterd/svc: glusterd_svcs_stop should call + individual wrapper function" + +This reverts commit 79fff98f9ca5f815cf0227312b9a997d555dad29. + +BUG: 1471742 +Change-Id: I258040ed9be6bc3b4498c76ed51d59258c55acff +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175950 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 12 ++---------- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 10 +++++----- + 2 files changed, 7 insertions(+), 15 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 981cc87..75f9a07 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -656,18 +656,10 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) + int pid = -1; + + conf = THIS->private; +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + svc_proc = svc->svc_proc; +- if (!svc_proc) { +- /* +- * This can happen when stop was called on a volume that is not shd +- * compatible. +- */ +- gf_msg_debug("glusterd", 0, "svc_proc is null, ie shd already stopped"); +- ret = 0; +- goto out; +- } ++ GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out); ++ GF_VALIDATE_OR_GOTO("glusterd", conf, out); + + /* Get volinfo->shd from svc object */ + shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index 6a3ca52..f7be394 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -86,25 +86,25 @@ glusterd_svcs_stop(glusterd_volinfo_t *volinfo) + priv = this->private; + GF_ASSERT(priv); + +- ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL); ++ ret = glusterd_svc_stop(&(priv->nfs_svc), SIGKILL); + if (ret) + goto out; + +- ret = priv->quotad_svc.stop(&(priv->quotad_svc), SIGTERM); ++ ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM); + if (ret) + goto out; + + if (volinfo) { +- ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM); ++ ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT); + if (ret) + goto out; + } + +- ret = priv->bitd_svc.stop(&(priv->bitd_svc), SIGTERM); ++ ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM); + if (ret) + goto out; + +- ret = priv->scrub_svc.stop(&(priv->scrub_svc), SIGTERM); ++ ret = glusterd_svc_stop(&(priv->scrub_svc), SIGTERM); + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch b/SOURCES/0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch new file mode 100644 index 0000000..2174063 --- /dev/null +++ b/SOURCES/0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch @@ -0,0 +1,427 @@ +From 48f7be493588fdf5e99dff0c3b91327e07da05f3 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:48:34 +0530 +Subject: [PATCH 242/255] Revert "tests/shd: Add test coverage for shd mux" + +This reverts commit b7f832288d2d2e57231d90765afc049ad7cb2f9d. + +BUG: 1471742 +Change-Id: Ifccac5150f07b98006714e43c77c5a4b1fd38cb8 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175951 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/glusterd-restart-shd-mux.t | 96 --------------------- + tests/basic/shd-mux.t | 149 --------------------------------- + tests/basic/volume-scale-shd-mux.t | 112 ------------------------- + tests/volume.rc | 15 ---- + 4 files changed, 372 deletions(-) + delete mode 100644 tests/basic/glusterd-restart-shd-mux.t + delete mode 100644 tests/basic/shd-mux.t + delete mode 100644 tests/basic/volume-scale-shd-mux.t + +diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t +deleted file mode 100644 +index a50af9d..0000000 +--- a/tests/basic/glusterd-restart-shd-mux.t ++++ /dev/null +@@ -1,96 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../include.rc +-. $(dirname $0)/../volume.rc +- +-cleanup; +- +-TESTS_EXPECTED_IN_LOOP=20 +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +-TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +-TEST $CLI volume set $V0 cluster.eager-lock off +-TEST $CLI volume set $V0 performance.flush-behind off +-TEST $CLI volume start $V0 +- +-for i in $(seq 1 3); do +- TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} +- TEST $CLI volume start ${V0}_afr$i +- TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} +- TEST $CLI volume start ${V0}_ec$i +-done +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +- +-#Stop the glusterd +-TEST pkill glusterd +-#Only stopping glusterd, so there will be one shd +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count +-TEST glusterd +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +-#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +-#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-shd_pid=$(get_shd_mux_pid $V0) +-for i in $(seq 1 3); do +- afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid" +- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path +- ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid" +- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path +-done +- +-#Reboot a node scenario +-TEST pkill gluster +-#Only stopped glusterd, so there will be one shd +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count +- +-TEST glusterd +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +- +-#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +-#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-shd_pid=$(get_shd_mux_pid $V0) +-for i in $(seq 1 3); do +- afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid" +- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path +- ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid" +- EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path +-done +- +-for i in $(seq 1 3); do +- TEST $CLI volume stop ${V0}_afr$i +- TEST $CLI volume stop ${V0}_ec$i +-done +- +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +- +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST kill_brick $V0 $H0 $B0/${V0}3 +- +-TEST touch $M0/foo{1..100} +- +-EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0 +- +-TEST $CLI volume start ${V0} force +- +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +- +-TEST rm -rf $M0/* +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +- +- +-TEST $CLI volume stop ${V0} +-TEST $CLI volume delete ${V0} +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count +- +-cleanup +diff --git a/tests/basic/shd-mux.t b/tests/basic/shd-mux.t +deleted file mode 100644 +index e42a34a..0000000 +--- a/tests/basic/shd-mux.t ++++ /dev/null +@@ -1,149 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../include.rc +-. $(dirname $0)/../volume.rc +- +-cleanup; +- +-TESTS_EXPECTED_IN_LOOP=16 +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +-TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +-TEST $CLI volume set $V0 cluster.eager-lock off +-TEST $CLI volume set $V0 performance.flush-behind off +-TEST $CLI volume start $V0 +-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +- +-shd_pid=$(get_shd_mux_pid $V0) +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-#Create a one more volume +-TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5} +-TEST $CLI volume start ${V0}_1 +- +-#Check whether the shd has multiplexed or not +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0} +- +-TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0 +-TEST $CLI volume set ${V0}_1 cluster.eager-lock off +-TEST $CLI volume set ${V0}_1 performance.flush-behind off +-TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1 +- +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST kill_brick $V0 $H0 $B0/${V0}4 +-TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10 +-TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14 +- +-TEST touch $M0/foo{1..100} +-TEST touch $M1/foo{1..100} +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1 +- +-TEST $CLI volume start ${V0} force +-TEST $CLI volume start ${V0}_1 force +- +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1 +- +-TEST rm -rf $M0/* +-TEST rm -rf $M1/* +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 +- +-#Stop the volume +-TEST $CLI volume stop ${V0}_1 +-TEST $CLI volume delete ${V0}_1 +- +-#Check the stop succeeded and detached the volume with out restarting it +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 +- +-#Check the thread count become to earlier number after stopping +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +- +-#Now create a ec volume and check mux works +-TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5} +-TEST $CLI volume start ${V0}_2 +- +-#Check whether the shd has multiplexed or not +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0} +- +-TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0 +-TEST $CLI volume set ${V0}_2 cluster.eager-lock off +-TEST $CLI volume set ${V0}_2 performance.flush-behind off +-TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1 +- +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST kill_brick $V0 $H0 $B0/${V0}4 +-TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20 +-TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22 +- +-TEST touch $M0/foo{1..100} +-TEST touch $M1/foo{1..100} +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2 +- +-TEST $CLI volume start ${V0} force +-TEST $CLI volume start ${V0}_2 force +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +- +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2 +- +-TEST rm -rf $M0/* +-TEST rm -rf $M1/* +- +- +-#Stop the volume +-TEST $CLI volume stop ${V0}_2 +-TEST $CLI volume delete ${V0}_2 +- +-#Check the stop succeeded and detached the volume with out restarting it +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 +- +-#Check the thread count become to zero for ec related threads +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +-#Check the thread count become to earlier number after stopping +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-for i in $(seq 1 3); do +- TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} +- TEST $CLI volume start ${V0}_afr$i +- TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} +- TEST $CLI volume start ${V0}_ec$i +-done +- +-#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +-#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +-#Delete the volumes +-for i in $(seq 1 3); do +- TEST $CLI volume stop ${V0}_afr$i +- TEST $CLI volume stop ${V0}_ec$i +- TEST $CLI volume delete ${V0}_afr$i +- TEST $CLI volume delete ${V0}_ec$i +-done +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +- +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-TEST $CLI volume stop ${V0} +-TEST $CLI volume delete ${V0} +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count +- +-cleanup +diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t +deleted file mode 100644 +index dd9cf83..0000000 +--- a/tests/basic/volume-scale-shd-mux.t ++++ /dev/null +@@ -1,112 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../include.rc +-. $(dirname $0)/../volume.rc +- +-cleanup; +- +-TESTS_EXPECTED_IN_LOOP=6 +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +-TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +-TEST $CLI volume set $V0 cluster.eager-lock off +-TEST $CLI volume set $V0 performance.flush-behind off +-TEST $CLI volume start $V0 +- +-for i in $(seq 1 2); do +- TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} +- TEST $CLI volume start ${V0}_afr$i +- TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} +- TEST $CLI volume start ${V0}_ec$i +-done +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +-#Check the thread count become to number of volumes*number of ec subvolume (2*6=12) +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +-#Check the thread count become to number of volumes*number of afr subvolume (3*6=18) +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8}; +-#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21) +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-#Remove the brick and check the detach is successful +-$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5}; +-#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18) +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +- +-#Remove the brick and check the detach is successful +-$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +- +- +-for i in $(seq 1 2); do +- TEST $CLI volume stop ${V0}_afr$i +- TEST $CLI volume stop ${V0}_ec$i +-done +- +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +- +-TEST kill_brick $V0 $H0 $B0/${V0}0 +-TEST kill_brick $V0 $H0 $B0/${V0}4 +- +-TEST touch $M0/foo{1..100} +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 +- +-TEST $CLI volume start ${V0} force +- +-EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +- +-TEST rm -rf $M0/* +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +-shd_pid=$(get_shd_mux_pid $V0) +-TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10 +-TEST $CLI volume start ${V0}_distribute1 +- +-#Creating a non-replicate/non-ec volume should not have any effect in shd +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +-EXPECT "^${shd_pid}$" get_shd_mux_pid $V0 +- +-TEST mkdir $B0/add/ +-#Now convert the distributed volume to replicate +-TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3} +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-#scale down the volume +-TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +- +-TEST $CLI volume stop ${V0} +-TEST $CLI volume delete ${V0} +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count +- +-TEST rm -rf $B0/add/ +-TEST mkdir $B0/add/ +-#Now convert the distributed volume back to replicate and make sure that a new shd is spawned +-TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}; +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +-EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "__afr_shd_healer_wait" +- +-#Now convert the replica volume to distribute again and make sure the shd is now stopped +-TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force +-TEST rm -rf $B0/add/ +- +-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count +- +-cleanup +diff --git a/tests/volume.rc b/tests/volume.rc +index 6a78c37..022d972 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -913,21 +913,6 @@ function volgen_check_ancestry { + fi + } + +-function get_shd_mux_pid { +- local volume=$1 +- pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'` +- echo $pid +-} +- +-function shd_count { +- ps aux | grep "glustershd" | grep -v grep | wc -l +-} +- +-function number_healer_threads_shd { +- local pid=$(get_shd_mux_pid $1) +- pstack $pid | grep $2 | wc -l +-} +- + function get_mtime { + local time=$(get-mdata-xattr -m $1) + if [ $time == "-1" ]; +-- +1.8.3.1 + diff --git a/SOURCES/0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch b/SOURCES/0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch new file mode 100644 index 0000000..9e918d7 --- /dev/null +++ b/SOURCES/0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch @@ -0,0 +1,154 @@ +From 4d65506ddfa0245dcaa13b14ca13b2ea762df37d Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:48:51 +0530 +Subject: [PATCH 243/255] Revert "glusterfsd/cleanup: Protect graph object + under a lock" + +This reverts commit 11b64d494c52004002f900888694d20ef8af6df6. + +BUG: 1471742 +Change-Id: I2717207d87ad213722de33c24e451502ed4aff48 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175952 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/graph.c | 58 ++++++++++--------------- + libglusterfs/src/statedump.c | 16 ++----- + tests/bugs/glusterd/optimized-basic-testcases.t | 4 +- + 3 files changed, 28 insertions(+), 50 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 18fb2d9..4c8b02d 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1392,12 +1392,8 @@ glusterfs_graph_cleanup(void *arg) + } + pthread_mutex_unlock(&ctx->notify_lock); + +- pthread_mutex_lock(&ctx->cleanup_lock); +- { +- glusterfs_graph_fini(graph); +- glusterfs_graph_destroy(graph); +- } +- pthread_mutex_unlock(&ctx->cleanup_lock); ++ glusterfs_graph_fini(graph); ++ glusterfs_graph_destroy(graph); + out: + return NULL; + } +@@ -1472,37 +1468,31 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + + if (!ctx || !ctx->active || !volfile_obj) + goto out; ++ parent_graph = ctx->active; ++ graph = volfile_obj->graph; ++ if (!graph) ++ goto out; ++ if (graph->first) ++ xl = graph->first; + +- pthread_mutex_lock(&ctx->cleanup_lock); +- { +- parent_graph = ctx->active; +- graph = volfile_obj->graph; +- if (!graph) +- goto unlock; +- if (graph->first) +- xl = graph->first; +- +- last_xl = graph->last_xl; +- if (last_xl) +- last_xl->next = NULL; +- if (!xl || xl->cleanup_starting) +- goto unlock; ++ last_xl = graph->last_xl; ++ if (last_xl) ++ last_xl->next = NULL; ++ if (!xl || xl->cleanup_starting) ++ goto out; + +- xl->cleanup_starting = 1; +- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, +- "detaching child %s", volfile_obj->vol_id); ++ xl->cleanup_starting = 1; ++ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, ++ "detaching child %s", volfile_obj->vol_id); + +- list_del_init(&volfile_obj->volfile_list); +- glusterfs_mux_xlator_unlink(parent_graph->top, xl); +- parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); +- parent_graph->xl_count -= graph->xl_count; +- parent_graph->leaf_count -= graph->leaf_count; +- default_notify(xl, GF_EVENT_PARENT_DOWN, xl); +- parent_graph->id++; +- ret = 0; +- } +-unlock: +- pthread_mutex_unlock(&ctx->cleanup_lock); ++ list_del_init(&volfile_obj->volfile_list); ++ glusterfs_mux_xlator_unlink(parent_graph->top, xl); ++ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); ++ parent_graph->xl_count -= graph->xl_count; ++ parent_graph->leaf_count -= graph->leaf_count; ++ default_notify(xl, GF_EVENT_PARENT_DOWN, xl); ++ parent_graph->id++; ++ ret = 0; + out: + if (!ret) { + list_del_init(&volfile_obj->volfile_list); +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index 0d58f8f..0cf80c0 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -805,17 +805,11 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + int brick_count = 0; + int len = 0; + ++ gf_proc_dump_lock(); ++ + if (!ctx) + goto out; + +- /* +- * Multiplexed daemons can change the active graph when attach/detach +- * is called. So this has to be protected with the cleanup lock. +- */ +- if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) +- pthread_mutex_lock(&ctx->cleanup_lock); +- gf_proc_dump_lock(); +- + if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) && + (ctx && ctx->active)) { + top = ctx->active->first; +@@ -929,11 +923,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + out: + GF_FREE(dump_options.dump_path); + dump_options.dump_path = NULL; +- if (ctx) { +- gf_proc_dump_unlock(); +- if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) +- pthread_mutex_unlock(&ctx->cleanup_lock); +- } ++ gf_proc_dump_unlock(); + + return; + } +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index 110f1b9..d700b5e 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -289,9 +289,7 @@ mkdir -p /xyz/var/lib/glusterd/abc + TEST $CLI volume create "test" $H0:/xyz/var/lib/glusterd/abc + EXPECT 'Created' volinfo_field "test" 'Status'; + +-#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause +-#failure. So Adding a EXPECT_WITHIN +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info ++EXPECT "1" generate_statedump_and_check_for_glusterd_info + + cleanup_statedump `pidof glusterd` + cleanup +-- +1.8.3.1 + diff --git a/SOURCES/0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch b/SOURCES/0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch new file mode 100644 index 0000000..0888021 --- /dev/null +++ b/SOURCES/0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch @@ -0,0 +1,292 @@ +From f6d967cd70ff41a0f93c54d50128c468e9d5dea9 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:49:21 +0530 +Subject: [PATCH 244/255] Revert "ec/shd: Cleanup self heal daemon resources + during ec fini" + +This reverts commit edc238e40060773f5f5fd59fcdad8ae27d65749f. + +BUG: 1471742 +Change-Id: If6cb5941b964f005454a21a67938b354ef1a2037 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175953 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/syncop-utils.c | 2 - + xlators/cluster/afr/src/afr-self-heald.c | 5 --- + xlators/cluster/ec/src/ec-heald.c | 77 +++++--------------------------- + xlators/cluster/ec/src/ec-heald.h | 3 -- + xlators/cluster/ec/src/ec-messages.h | 3 +- + xlators/cluster/ec/src/ec.c | 47 ------------------- + 6 files changed, 13 insertions(+), 124 deletions(-) + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index 4167db4..b842142 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -354,8 +354,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + + if (frame) { + this = frame->this; +- } else { +- this = THIS; + } + + /*For this functionality to be implemented in general, we need +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 522fe5d..8bc4720 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -524,11 +524,6 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + afr_private_t *priv = NULL; + + priv = this->private; +- +- if (this->cleanup_starting) { +- return -ENOTCONN; +- } +- + if (!priv->shd.enabled) + return -EBUSY; + +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index edf5e11..cba111a 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -71,11 +71,6 @@ disabled_loop: + break; + } + +- if (ec->shutdown) { +- healer->running = _gf_false; +- return -1; +- } +- + ret = healer->rerun; + healer->rerun = 0; + +@@ -246,11 +241,9 @@ ec_shd_index_sweep(struct subvol_healer *healer) + goto out; + } + +- _mask_cancellation(); + ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, + healer, ec_shd_index_heal, xdata, + ec->shd.max_threads, ec->shd.wait_qlength); +- _unmask_cancellation(); + out: + if (xdata) + dict_unref(xdata); +@@ -270,11 +263,6 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + int ret = 0; + + ec = this->private; +- +- if (this->cleanup_starting) { +- return -ENOTCONN; +- } +- + if (ec->xl_up_count <= ec->fragments) { + return -ENOTCONN; + } +@@ -317,15 +305,11 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode) + { + ec_t *ec = NULL; + loc_t loc = {0}; +- int ret = -1; + + ec = healer->this->private; + loc.inode = inode; +- _mask_cancellation(); +- ret = syncop_ftw(ec->xl_list[healer->subvol], &loc, +- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); +- _unmask_cancellation(); +- return ret; ++ return syncop_ftw(ec->xl_list[healer->subvol], &loc, ++ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); + } + + void * +@@ -333,16 +317,13 @@ ec_shd_index_healer(void *data) + { + struct subvol_healer *healer = NULL; + xlator_t *this = NULL; +- int run = 0; + + healer = data; + THIS = this = healer->this; + ec_t *ec = this->private; + + for (;;) { +- run = ec_shd_healer_wait(healer); +- if (run == -1) +- break; ++ ec_shd_healer_wait(healer); + + if (ec->xl_up_count > ec->fragments) { + gf_msg_debug(this->name, 0, "starting index sweep on subvol %s", +@@ -371,12 +352,16 @@ ec_shd_full_healer(void *data) + + rootloc.inode = this->itable->root; + for (;;) { +- run = ec_shd_healer_wait(healer); +- if (run < 0) { +- break; +- } else if (run == 0) { +- continue; ++ pthread_mutex_lock(&healer->mutex); ++ { ++ run = __ec_shd_healer_wait(healer); ++ if (!run) ++ healer->running = _gf_false; + } ++ pthread_mutex_unlock(&healer->mutex); ++ ++ if (!run) ++ break; + + if (ec->xl_up_count > ec->fragments) { + gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START, +@@ -577,41 +562,3 @@ out: + dict_del(output, this->name); + return ret; + } +- +-void +-ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) +-{ +- if (!healer) +- return; +- +- pthread_cond_destroy(&healer->cond); +- pthread_mutex_destroy(&healer->mutex); +-} +- +-void +-ec_selfheal_daemon_fini(xlator_t *this) +-{ +- struct subvol_healer *healer = NULL; +- ec_self_heald_t *shd = NULL; +- ec_t *priv = NULL; +- int i = 0; +- +- priv = this->private; +- if (!priv) +- return; +- +- shd = &priv->shd; +- if (!shd->iamshd) +- return; +- +- for (i = 0; i < priv->nodes; i++) { +- healer = &shd->index_healers[i]; +- ec_destroy_healer_object(this, healer); +- +- healer = &shd->full_healers[i]; +- ec_destroy_healer_object(this, healer); +- } +- +- GF_FREE(shd->index_healers); +- GF_FREE(shd->full_healers); +-} +diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h +index 8184cf4..2eda2a7 100644 +--- a/xlators/cluster/ec/src/ec-heald.h ++++ b/xlators/cluster/ec/src/ec-heald.h +@@ -24,7 +24,4 @@ ec_selfheal_daemon_init(xlator_t *this); + void + ec_shd_index_healer_wake(ec_t *ec); + +-void +-ec_selfheal_daemon_fini(xlator_t *this); +- + #endif /* __EC_HEALD_H__ */ +diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h +index ce299bb..7c28808 100644 +--- a/xlators/cluster/ec/src/ec-messages.h ++++ b/xlators/cluster/ec/src/ec-messages.h +@@ -55,7 +55,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, + EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, + EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, + EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, +- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, +- EC_MSG_THREAD_CLEANUP_FAILED); ++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); + + #endif /* !_EC_MESSAGES_H_ */ +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 264582a..3c8013e 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -429,51 +429,6 @@ ec_disable_delays(ec_t *ec) + } + + void +-ec_cleanup_healer_object(ec_t *ec) +-{ +- struct subvol_healer *healer = NULL; +- ec_self_heald_t *shd = NULL; +- void *res = NULL; +- int i = 0; +- gf_boolean_t is_join = _gf_false; +- +- shd = &ec->shd; +- if (!shd->iamshd) +- return; +- +- for (i = 0; i < ec->nodes; i++) { +- healer = &shd->index_healers[i]; +- pthread_mutex_lock(&healer->mutex); +- { +- healer->rerun = 1; +- if (healer->running) { +- pthread_cond_signal(&healer->cond); +- is_join = _gf_true; +- } +- } +- pthread_mutex_unlock(&healer->mutex); +- if (is_join) { +- pthread_join(healer->thread, &res); +- is_join = _gf_false; +- } +- +- healer = &shd->full_healers[i]; +- pthread_mutex_lock(&healer->mutex); +- { +- healer->rerun = 1; +- if (healer->running) { +- pthread_cond_signal(&healer->cond); +- is_join = _gf_true; +- } +- } +- pthread_mutex_unlock(&healer->mutex); +- if (is_join) { +- pthread_join(healer->thread, &res); +- is_join = _gf_false; +- } +- } +-} +-void + ec_pending_fops_completed(ec_t *ec) + { + if (ec->shutdown) { +@@ -589,7 +544,6 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) + /* If there aren't pending fops running after we have waken up + * them, we immediately propagate the notification. */ + propagate = ec_disable_delays(ec); +- ec_cleanup_healer_object(ec); + goto unlock; + } + +@@ -805,7 +759,6 @@ failed: + void + fini(xlator_t *this) + { +- ec_selfheal_daemon_fini(this); + __ec_destroy_private(this); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch b/SOURCES/0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch new file mode 100644 index 0000000..6495e38 --- /dev/null +++ b/SOURCES/0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch @@ -0,0 +1,151 @@ +From 022701465f3e642cdb7942995647615baa266a35 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:49:31 +0530 +Subject: [PATCH 245/255] Revert "shd/glusterd: Serialize shd manager to + prevent race condition" + +This reverts commit 646292b4f73bf1b506d034b85787f794963d7196. + +BUG: 1471742 +Change-Id: Ie21fbe18965d8bdea81f4276b57960a27a4db89d +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175954 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../serialize-shd-manager-glusterd-restart.t | 54 ---------------------- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 14 ------ + xlators/mgmt/glusterd/src/glusterd.c | 1 - + xlators/mgmt/glusterd/src/glusterd.h | 3 -- + 4 files changed, 72 deletions(-) + delete mode 100644 tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t + +diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t +deleted file mode 100644 +index 3a27c2a..0000000 +--- a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t ++++ /dev/null +@@ -1,54 +0,0 @@ +-#! /bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../cluster.rc +- +-function check_peers { +-count=`$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l` +-echo $count +-} +- +-function check_shd { +-ps aux | grep $1 | grep glustershd | wc -l +-} +- +-cleanup +- +- +-TEST launch_cluster 6 +- +-TESTS_EXPECTED_IN_LOOP=25 +-for i in $(seq 2 6); do +- hostname="H$i" +- TEST $CLI_1 peer probe ${!hostname} +-done +- +- +-EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers; +-for i in $(seq 1 5); do +- +- TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i +- TEST $CLI_1 volume start ${V0}_$i force +- +-done +- +-#kill a node +-TEST kill_node 3 +- +-TEST $glusterd_3; +-EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers +- +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3 +- +-for i in $(seq 1 5); do +- +- TEST $CLI_1 volume stop ${V0}_$i +- TEST $CLI_1 volume delete ${V0}_$i +- +-done +- +-for i in $(seq 1 6); do +- hostname="H$i" +- EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname} +-done +-cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 75f9a07..a9eab42 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -254,26 +254,14 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + { + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; +- glusterd_conf_t *conf = NULL; +- gf_boolean_t shd_restart = _gf_false; + +- conf = THIS->private; + volinfo = data; +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); + GF_VALIDATE_OR_GOTO("glusterd", svc, out); + GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); + + if (volinfo) + glusterd_volinfo_ref(volinfo); + +- while (conf->restart_shd) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); +- } +- conf->restart_shd = _gf_true; +- shd_restart = _gf_true; +- + ret = glusterd_shdsvc_create_volfile(volinfo); + if (ret) + goto out; +@@ -322,8 +310,6 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + } + } + out: +- if (shd_restart) +- conf->restart_shd = _gf_false; + if (volinfo) + glusterd_volinfo_unref(volinfo); + if (ret) +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index 6d7dd4a..c0973cb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1819,7 +1819,6 @@ init(xlator_t *this) + conf->rpc = rpc; + conf->uds_rpc = uds_rpc; + conf->gfs_mgmt = &gd_brick_prog; +- conf->restart_shd = _gf_false; + this->private = conf; + /* conf->workdir and conf->rundir are smaller than PATH_MAX; gcc's + * snprintf checking will throw an error here if sprintf is used. +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 7d07d33..0fbc9dd 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -222,9 +222,6 @@ typedef struct { + gf_atomic_t blockers; + uint32_t mgmt_v3_lock_timeout; + gf_boolean_t restart_bricks; +- gf_boolean_t restart_shd; /* This flag prevents running two shd manager +- simultaneously +- */ + pthread_mutex_t attach_lock; /* Lock can be per process or a common one */ + pthread_mutex_t volume_lock; /* We release the big_lock from lot of places + which might lead the modification of volinfo +-- +1.8.3.1 + diff --git a/SOURCES/0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch b/SOURCES/0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch new file mode 100644 index 0000000..bbc780c --- /dev/null +++ b/SOURCES/0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch @@ -0,0 +1,53 @@ +From bc5e3967864d6f6ea22deb22ba72aedca8367797 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:49:41 +0530 +Subject: [PATCH 246/255] Revert "glusterd/shd: Keep a ref on volinfo until + attach rpc execute cbk" + +This reverts commit c429d3c63601e6ea15af76aa684c30bbeb746467. + +BUG: 1471742 +Change-Id: I614e8bdbcc5111dbf407aba047e7d2284bef8ac8 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175955 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 3 --- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 ---- + 2 files changed, 7 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index a9eab42..19eca9f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -452,11 +452,8 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + } + + if (shd->attached) { +- glusterd_volinfo_ref(volinfo); +- /* Unref will happen from glusterd_svc_attach_cbk */ + ret = glusterd_attach_svc(svc, volinfo, flags); + if (ret) { +- glusterd_volinfo_unref(volinfo); + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, + "Failed to attach shd svc(volume=%s) to pid=%d. Starting" + "a new process", +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index f7be394..02945b1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -695,10 +695,6 @@ out: + if (flag) { + GF_FREE(flag); + } +- +- if (volinfo) +- glusterd_volinfo_unref(volinfo); +- + GF_ATOMIC_DEC(conf->blockers); + STACK_DESTROY(frame->root); + return 0; +-- +1.8.3.1 + diff --git a/SOURCES/0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch b/SOURCES/0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch new file mode 100644 index 0000000..dc17d72 --- /dev/null +++ b/SOURCES/0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch @@ -0,0 +1,144 @@ +From 33d59c74169192b4ba89abc915d8d785bc450fbb Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:49:54 +0530 +Subject: [PATCH 247/255] Revert "afr/shd: Cleanup self heal daemon resources + during afr fini" + +This reverts commit faaaa3452ceec6afcc18cffc9beca3fe19841cce. + +BUG: 1471742 +Change-Id: Id4a22ab45b89872684830f866ec4b589fca50a90 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175956 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/syncop-utils.c | 8 ----- + xlators/cluster/afr/src/afr-self-heald.c | 2 -- + xlators/cluster/afr/src/afr.c | 57 -------------------------------- + 3 files changed, 67 deletions(-) + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index b842142..be03527 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -350,11 +350,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + gf_boolean_t cond_init = _gf_false; + gf_boolean_t mut_init = _gf_false; + gf_dirent_t entries; +- xlator_t *this = NULL; +- +- if (frame) { +- this = frame->this; +- } + + /*For this functionality to be implemented in general, we need + * synccond_t infra which doesn't block the executing thread. Until then +@@ -402,9 +397,6 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, + + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { +- if (this && this->cleanup_starting) +- goto out; +- + list_del_init(&entry->list); + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) { + gf_dirent_entry_free(entry); +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 8bc4720..7eb1207 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -373,7 +373,6 @@ afr_shd_sweep_prepare(struct subvol_healer *healer) + + time(&event->start_time); + event->end_time = 0; +- _mask_cancellation(); + } + + void +@@ -395,7 +394,6 @@ afr_shd_sweep_done(struct subvol_healer *healer) + + if (eh_save_history(shd->statistics[healer->subvol], history) < 0) + GF_FREE(history); +- _unmask_cancellation(); + } + + int +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index a0a7551..33258a0 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -611,70 +611,13 @@ init(xlator_t *this) + out: + return ret; + } +-void +-afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) +-{ +- int ret = -1; +- +- if (!healer) +- return; +- +- if (healer->running) { +- /* +- * If there are any resources to cleanup, We need +- * to do that gracefully using pthread_cleanup_push +- */ +- ret = gf_thread_cleanup_xint(healer->thread); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED, +- "Failed to clean up healer threads."); +- healer->thread = 0; +- } +- pthread_cond_destroy(&healer->cond); +- pthread_mutex_destroy(&healer->mutex); +-} +- +-void +-afr_selfheal_daemon_fini(xlator_t *this) +-{ +- struct subvol_healer *healer = NULL; +- afr_self_heald_t *shd = NULL; +- afr_private_t *priv = NULL; +- int i = 0; +- +- priv = this->private; +- if (!priv) +- return; +- +- shd = &priv->shd; +- if (!shd->iamshd) +- return; +- +- for (i = 0; i < priv->child_count; i++) { +- healer = &shd->index_healers[i]; +- afr_destroy_healer_object(this, healer); + +- healer = &shd->full_healers[i]; +- afr_destroy_healer_object(this, healer); +- +- if (shd->statistics[i]) +- eh_destroy(shd->statistics[i]); +- } +- GF_FREE(shd->index_healers); +- GF_FREE(shd->full_healers); +- GF_FREE(shd->statistics); +- if (shd->split_brain) +- eh_destroy(shd->split_brain); +-} + void + fini(xlator_t *this) + { + afr_private_t *priv = NULL; + + priv = this->private; +- +- afr_selfheal_daemon_fini(this); +- + LOCK(&priv->lock); + if (priv->timer != NULL) { + gf_timer_call_cancel(this->ctx, priv->timer); +-- +1.8.3.1 + diff --git a/SOURCES/0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch b/SOURCES/0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch new file mode 100644 index 0000000..d721a82 --- /dev/null +++ b/SOURCES/0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch @@ -0,0 +1,151 @@ +From 469cb9e16d46f075caf609ddcb12a7c02d73ce8b Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:50:24 +0530 +Subject: [PATCH 248/255] Revert "shd/mux: Fix coverity issues introduced by + shd mux patch" + +This reverts commit 0021a4bbc9af2bfe28d4a79f76c3cd33f23dd118. + +BUG: 1471742 +Change-Id: I0749328412ed3cc7ae5d64baea7a90b63b489a08 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175957 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/graph.c | 21 ++++++++------------- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 6 ------ + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 24 +++++++----------------- + 3 files changed, 15 insertions(+), 36 deletions(-) + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index 4c8b02d..a492dd8 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1470,9 +1470,7 @@ glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) + goto out; + parent_graph = ctx->active; + graph = volfile_obj->graph; +- if (!graph) +- goto out; +- if (graph->first) ++ if (graph && graph->first) + xl = graph->first; + + last_xl = graph->last_xl; +@@ -1593,10 +1591,12 @@ glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, + parent_graph->leaf_count += graph->leaf_count; + parent_graph->id++; + +- volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); + if (!volfile_obj) { +- ret = -1; +- goto out; ++ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); ++ if (!volfile_obj) { ++ ret = -1; ++ goto out; ++ } + } + + graph->used = 1; +@@ -1641,7 +1641,6 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + { + glusterfs_graph_t *oldvolfile_graph = NULL; + glusterfs_graph_t *newvolfile_graph = NULL; +- char vol_id[NAME_MAX + 1]; + + int ret = -1; + +@@ -1673,9 +1672,6 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first); + + if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) { +- ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id); +- if (ret < 0) +- goto out; + ret = glusterfs_process_svc_detach(ctx, volfile_obj); + if (ret) { + gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, +@@ -1684,9 +1680,8 @@ glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, + "old graph. Aborting the reconfiguration operation"); + goto out; + } +- volfile_obj = NULL; +- ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id, +- checksum); ++ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, ++ volfile_obj->vol_id, checksum); + goto out; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 19eca9f..4789843 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -101,8 +101,6 @@ glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, + svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc); + ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s", + mux_conn->sockpath); +- if (ret < 0) +- goto out; + } else { + ret = mkdir_p(logdir, 0755, _gf_true); + if ((ret == -1) && (EEXIST != errno)) { +@@ -675,10 +673,6 @@ glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) + glusterd_volinfo_ref(volinfo); + svc_proc->data = volinfo; + ret = glusterd_svc_stop(svc, sig); +- if (ret) { +- glusterd_volinfo_unref(volinfo); +- goto out; +- } + } + if (!empty && pid != -1) { + ret = glusterd_detach_svc(svc, volinfo, sig); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index 02945b1..e42703c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -411,14 +411,9 @@ __gf_find_compatible_svc(gd_node_type daemon) + conf = THIS->private; + GF_VALIDATE_OR_GOTO("glusterd", conf, out); + +- switch (daemon) { +- case GD_NODE_SHD: { +- svc_procs = &conf->shd_procs; +- if (!svc_procs) +- goto out; +- } break; +- default: +- /* Add support for other client daemons here */ ++ if (daemon == GD_NODE_SHD) { ++ svc_procs = &conf->shd_procs; ++ if (!svc_procs) + goto out; + } + +@@ -545,16 +540,11 @@ __gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid) + if (!conf) + return NULL; + +- switch (daemon) { +- case GD_NODE_SHD: { +- svc_procs = &conf->shd_procs; +- if (!svc_procs) +- return NULL; +- } break; +- default: +- /* Add support for other client daemons here */ ++ if (daemon == GD_NODE_SHD) { ++ svc_procs = &conf->shd_procs; ++ if (!svc_proc) + return NULL; +- } ++ } /* Can be moved to switch when mux is implemented for other daemon; */ + + cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) + { +-- +1.8.3.1 + diff --git a/SOURCES/0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch b/SOURCES/0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch new file mode 100644 index 0000000..5547257 --- /dev/null +++ b/SOURCES/0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch @@ -0,0 +1,95 @@ +From 1864a4f382f3031915e8126440a1561035487e49 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:53:20 +0530 +Subject: [PATCH 249/255] Revert "client/fini: return fini after rpc cleanup" + +This reverts commit d79cb2cdff6fe8d962c9ac095a7541ddf500302b. + +BUG: 1471742 +Change-Id: I15e6544d47fb7b6002c3b44de3fe0b2a13c84f51 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175958 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/protocol/client/src/client.c | 25 +++++-------------------- + xlators/protocol/client/src/client.h | 6 ------ + 2 files changed, 5 insertions(+), 26 deletions(-) + +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 95e4be5..532ef35 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -49,12 +49,11 @@ client_fini_complete(xlator_t *this) + if (!conf->destroy) + return 0; + +- pthread_mutex_lock(&conf->lock); +- { +- conf->fini_completed = _gf_true; +- pthread_cond_broadcast(&conf->fini_complete_cond); +- } +- pthread_mutex_unlock(&conf->lock); ++ this->private = NULL; ++ ++ pthread_spin_destroy(&conf->fd_lock); ++ pthread_mutex_destroy(&conf->lock); ++ GF_FREE(conf); + + out: + return 0; +@@ -2730,7 +2729,6 @@ init(xlator_t *this) + goto out; + + pthread_mutex_init(&conf->lock, NULL); +- pthread_cond_init(&conf->fini_complete_cond, NULL); + pthread_spin_init(&conf->fd_lock, 0); + INIT_LIST_HEAD(&conf->saved_fds); + +@@ -2789,7 +2787,6 @@ fini(xlator_t *this) + if (!conf) + return; + +- conf->fini_completed = _gf_false; + conf->destroy = 1; + if (conf->rpc) { + /* cleanup the saved-frames before last unref */ +@@ -2797,18 +2794,6 @@ fini(xlator_t *this) + rpc_clnt_unref(conf->rpc); + } + +- pthread_mutex_lock(&conf->lock); +- { +- while (!conf->fini_completed) +- pthread_cond_wait(&conf->fini_complete_cond, &conf->lock); +- } +- pthread_mutex_unlock(&conf->lock); +- +- pthread_spin_destroy(&conf->fd_lock); +- pthread_mutex_destroy(&conf->lock); +- pthread_cond_destroy(&conf->fini_complete_cond); +- GF_FREE(conf); +- + /* Saved Fds */ + /* TODO: */ + +diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h +index 8dcd72f..f12fa61 100644 +--- a/xlators/protocol/client/src/client.h ++++ b/xlators/protocol/client/src/client.h +@@ -235,12 +235,6 @@ typedef struct clnt_conf { + * up, disconnects can be + * logged + */ +- +- gf_boolean_t old_protocol; /* used only for old-protocol testing */ +- pthread_cond_t fini_complete_cond; /* Used to wait till we finsh the fini +- compltely, ie client_fini_complete +- to return*/ +- gf_boolean_t fini_completed; + } clnt_conf_t; + + typedef struct _client_fd_ctx { +-- +1.8.3.1 + diff --git a/SOURCES/0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch b/SOURCES/0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch new file mode 100644 index 0000000..637a16a --- /dev/null +++ b/SOURCES/0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch @@ -0,0 +1,4572 @@ +From ec629963d61c3ec084c95366eec5ee3a976b1213 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 11 Jul 2019 12:57:45 +0530 +Subject: [PATCH 250/255] Revert "mgmt/shd: Implement multiplexing in self heal + daemon" + +This reverts commit 2cede2b87fb3e3e0673be9cf67e7d6eec3f7879c. + +BUG: 1471742 +Change-Id: I3830d9189dfdb567a44935aa97dc963f4594dfdb +fixes: bz#1471742 +Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175959 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfsd/src/glusterfsd-messages.h | 2 +- + glusterfsd/src/glusterfsd-mgmt.c | 238 +------ + glusterfsd/src/glusterfsd.c | 20 +- + libglusterfs/src/defaults-tmpl.c | 19 +- + libglusterfs/src/glusterfs/glusterfs.h | 7 - + libglusterfs/src/glusterfs/libglusterfs-messages.h | 4 +- + libglusterfs/src/glusterfs/xlator.h | 3 - + libglusterfs/src/graph.c | 451 ------------- + libglusterfs/src/graph.y | 3 - + libglusterfs/src/libglusterfs.sym | 5 - + libglusterfs/src/statedump.c | 3 +- + libglusterfs/src/xlator.c | 16 - + rpc/rpc-lib/src/protocol-common.h | 2 - + tests/basic/glusterd/heald.t | 49 +- + .../reset-brick-and-daemons-follow-quorum.t | 8 +- + tests/volume.rc | 6 +- + xlators/mgmt/glusterd/src/Makefile.am | 6 +- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c | 42 -- + xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h | 4 +- + xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-handler.c | 11 +- + xlators/mgmt/glusterd/src/glusterd-handshake.c | 21 - + xlators/mgmt/glusterd/src/glusterd-mem-types.h | 1 - + xlators/mgmt/glusterd/src/glusterd-messages.h | 4 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 84 +-- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.c | 140 ---- + .../mgmt/glusterd/src/glusterd-shd-svc-helper.h | 45 -- + xlators/mgmt/glusterd/src/glusterd-shd-svc.c | 540 ++-------------- + xlators/mgmt/glusterd/src/glusterd-shd-svc.h | 17 +- + xlators/mgmt/glusterd/src/glusterd-sm.c | 12 +- + xlators/mgmt/glusterd/src/glusterd-snapd-svc.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-statedump.c | 3 + + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 715 +-------------------- + xlators/mgmt/glusterd/src/glusterd-svc-helper.h | 40 +- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c | 246 ++----- + xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h | 27 - + xlators/mgmt/glusterd/src/glusterd-tier.c | 3 +- + xlators/mgmt/glusterd/src/glusterd-tierd-svc.c | 4 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 137 +--- + xlators/mgmt/glusterd/src/glusterd-utils.h | 4 - + xlators/mgmt/glusterd/src/glusterd-volgen.c | 60 +- + xlators/mgmt/glusterd/src/glusterd-volgen.h | 11 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 8 +- + xlators/mgmt/glusterd/src/glusterd.c | 12 +- + xlators/mgmt/glusterd/src/glusterd.h | 30 +- + xlators/protocol/client/src/client.c | 31 +- + 47 files changed, 292 insertions(+), 2810 deletions(-) + delete mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c + delete mode 100644 xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h + +diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h +index 280624c..ce6c8ca 100644 +--- a/glusterfsd/src/glusterfsd-messages.h ++++ b/glusterfsd/src/glusterfsd-messages.h +@@ -36,6 +36,6 @@ GLFS_MSGID(GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3, + glusterfsd_msg_31, glusterfsd_msg_32, glusterfsd_msg_33, + glusterfsd_msg_34, glusterfsd_msg_35, glusterfsd_msg_36, + glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39, +- glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43); ++ glusterfsd_msg_40); + + #endif /* !_GLUSTERFSD_MESSAGES_H_ */ +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index 1d2cd1a..15acc10 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -48,20 +48,7 @@ int + glusterfs_graph_unknown_options(glusterfs_graph_t *graph); + int + emancipate(glusterfs_ctx_t *ctx, int ret); +-int +-glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum); +-int +-glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, +- gf_volfile_t *volfile_obj, char *checksum); +-int +-glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum); +-int +-glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj); + +-gf_boolean_t +-mgmt_is_multiplexed_daemon(char *name); + int + mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + { +@@ -75,96 +62,6 @@ mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data) + } + + int +-mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id) +-{ +- glusterfs_ctx_t *ctx = NULL; +- int ret = 0; +- FILE *tmpfp = NULL; +- gf_volfile_t *volfile_obj = NULL; +- gf_volfile_t *volfile_tmp = NULL; +- char sha256_hash[SHA256_DIGEST_LENGTH] = { +- 0, +- }; +- int tmp_fd = -1; +- char template[] = "/tmp/glfs.volfile.XXXXXX"; +- +- glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash); +- ctx = THIS->ctx; +- LOCK(&ctx->volfile_lock); +- { +- list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) +- { +- if (!strcmp(volfile_id, volfile_obj->vol_id)) { +- if (!memcmp(sha256_hash, volfile_obj->volfile_checksum, +- sizeof(volfile_obj->volfile_checksum))) { +- UNLOCK(&ctx->volfile_lock); +- gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40, +- "No change in volfile, continuing"); +- goto out; +- } +- volfile_tmp = volfile_obj; +- break; +- } +- } +- +- /* coverity[secure_temp] mkstemp uses 0600 as the mode */ +- tmp_fd = mkstemp(template); +- if (-1 == tmp_fd) { +- UNLOCK(&ctx->volfile_lock); +- gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39, +- "Unable to create temporary file: %s", template); +- ret = -1; +- goto out; +- } +- +- /* Calling unlink so that when the file is closed or program +- * terminates the temporary file is deleted. +- */ +- ret = sys_unlink(template); +- if (ret < 0) { +- gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39, +- "Unable to delete temporary file: %s", template); +- ret = 0; +- } +- +- tmpfp = fdopen(tmp_fd, "w+b"); +- if (!tmpfp) { +- ret = -1; +- goto unlock; +- } +- +- fwrite(volfile, size, 1, tmpfp); +- fflush(tmpfp); +- if (ferror(tmpfp)) { +- ret = -1; +- goto unlock; +- } +- +- if (!volfile_tmp) { +- /* There is no checksum in the list, which means simple attach +- * the volfile +- */ +- ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id, +- sha256_hash); +- goto unlock; +- } +- ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj, +- sha256_hash); +- if (ret < 0) { +- gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!"); +- } +- } +-unlock: +- UNLOCK(&ctx->volfile_lock); +-out: +- if (tmpfp) +- fclose(tmpfp); +- else if (tmp_fd != -1) +- sys_close(tmp_fd); +- return ret; +-} +- +-int + mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data) + { + return 0; +@@ -1069,110 +966,6 @@ glusterfs_handle_attach(rpcsvc_request_t *req) + } + + int +-glusterfs_handle_svc_attach(rpcsvc_request_t *req) +-{ +- int32_t ret = -1; +- gd1_mgmt_brick_op_req xlator_req = { +- 0, +- }; +- xlator_t *this = NULL; +- glusterfs_ctx_t *ctx = NULL; +- +- GF_ASSERT(req); +- this = THIS; +- GF_ASSERT(this); +- +- ctx = this->ctx; +- ret = xdr_to_generic(req->msg[0], &xlator_req, +- (xdrproc_t)xdr_gd1_mgmt_brick_op_req); +- +- if (ret < 0) { +- /*failed to decode msg;*/ +- req->rpc_err = GARBAGE_ARGS; +- goto out; +- } +- gf_msg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, +- "received attach " +- "request for volfile-id=%s", +- xlator_req.name); +- ret = 0; +- +- if (ctx->active) { +- ret = mgmt_process_volfile(xlator_req.input.input_val, +- xlator_req.input.input_len, xlator_req.name); +- } else { +- gf_msg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, +- "got attach for %s but no active graph", xlator_req.name); +- } +-out: +- if (xlator_req.input.input_val) +- free(xlator_req.input.input_val); +- if (xlator_req.name) +- free(xlator_req.name); +- glusterfs_translator_info_response_send(req, ret, NULL, NULL); +- return 0; +-} +- +-int +-glusterfs_handle_svc_detach(rpcsvc_request_t *req) +-{ +- gd1_mgmt_brick_op_req xlator_req = { +- 0, +- }; +- ssize_t ret; +- glusterfs_ctx_t *ctx = NULL; +- gf_volfile_t *volfile_obj = NULL; +- gf_volfile_t *volfile_tmp = NULL; +- +- ret = xdr_to_generic(req->msg[0], &xlator_req, +- (xdrproc_t)xdr_gd1_mgmt_brick_op_req); +- if (ret < 0) { +- req->rpc_err = GARBAGE_ARGS; +- return -1; +- } +- ctx = glusterfsd_ctx; +- +- LOCK(&ctx->volfile_lock); +- { +- list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) +- { +- if (!strcmp(xlator_req.name, volfile_obj->vol_id)) { +- volfile_tmp = volfile_obj; +- break; +- } +- } +- +- if (!volfile_tmp) { +- UNLOCK(&ctx->volfile_lock); +- gf_msg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_41, +- "can't detach %s - not found", xlator_req.name); +- /* +- * Used to be -ENOENT. However, the caller asked us to +- * make sure it's down and if it's already down that's +- * good enough. +- */ +- ret = 0; +- goto out; +- } +- ret = glusterfs_process_svc_detach(ctx, volfile_tmp); +- if (ret) { +- UNLOCK(&ctx->volfile_lock); +- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_41, +- "Could not detach " +- "old graph. Aborting the reconfiguration operation"); +- goto out; +- } +- } +- UNLOCK(&ctx->volfile_lock); +-out: +- glusterfs_terminate_response_send(req, ret); +- free(xlator_req.name); +- xlator_req.name = NULL; +- +- return 0; +-} +- +-int + glusterfs_handle_dump_metrics(rpcsvc_request_t *req) + { + int32_t ret = -1; +@@ -2056,13 +1849,6 @@ rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = { + + [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", GLUSTERD_DUMP_METRICS, + glusterfs_handle_dump_metrics, NULL, 0, DRC_NA}, +- +- [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", GLUSTERD_SVC_ATTACH, +- glusterfs_handle_svc_attach, NULL, 0, DRC_NA}, +- +- [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", GLUSTERD_SVC_DETACH, +- glusterfs_handle_svc_detach, NULL, 0, DRC_NA}, +- + }; + + struct rpcsvc_program glusterfs_mop_prog = { +@@ -2210,17 +1996,14 @@ mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count, + } + + volfile: ++ ret = 0; + size = rsp.op_ret; +- volfile_id = frame->local; +- if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { +- ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id); +- goto post_graph_mgmt; +- } + +- ret = 0; + glusterfs_compute_sha256((const unsigned char *)rsp.spec, size, + sha256_hash); + ++ volfile_id = frame->local; ++ + LOCK(&ctx->volfile_lock); + { + locked = 1; +@@ -2322,7 +2105,6 @@ volfile: + } + + INIT_LIST_HEAD(&volfile_tmp->volfile_list); +- volfile_tmp->graph = ctx->active; + list_add(&volfile_tmp->volfile_list, &ctx->volfile_list); + snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s", + volfile_id); +@@ -2334,7 +2116,6 @@ volfile: + + locked = 0; + +-post_graph_mgmt: + if (!is_mgmt_rpc_reconnect) { + need_emancipate = 1; + glusterfs_mgmt_pmap_signin(ctx); +@@ -2488,21 +2269,10 @@ glusterfs_volfile_fetch(glusterfs_ctx_t *ctx) + { + xlator_t *server_xl = NULL; + xlator_list_t *trav; +- gf_volfile_t *volfile_obj = NULL; +- int ret = 0; ++ int ret; + + LOCK(&ctx->volfile_lock); + { +- if (ctx->active && +- mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) { +- list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list) +- { +- ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id); +- } +- UNLOCK(&ctx->volfile_lock); +- return ret; +- } +- + if (ctx->active) { + server_xl = ctx->active->first; + if (strcmp(server_xl->type, "protocol/server") != 0) { +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 2172af4..5b5e996 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -2593,6 +2593,24 @@ out: + #endif + + int ++glusterfs_graph_fini(glusterfs_graph_t *graph) ++{ ++ xlator_t *trav = NULL; ++ ++ trav = graph->first; ++ ++ while (trav) { ++ if (trav->init_succeeded) { ++ trav->fini(trav); ++ trav->init_succeeded = 0; ++ } ++ trav = trav->next; ++ } ++ ++ return 0; ++} ++ ++int + glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp) + { + glusterfs_graph_t *graph = NULL; +@@ -2791,7 +2809,7 @@ main(int argc, char *argv[]) + + /* set brick_mux mode only for server process */ + if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) { +- gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43, ++ gf_msg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_40, + "command line argument --brick-mux is valid only for brick " + "process"); + goto out; +diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c +index 82e7f78..5bf64e8 100644 +--- a/libglusterfs/src/defaults-tmpl.c ++++ b/libglusterfs/src/defaults-tmpl.c +@@ -127,12 +127,6 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + GF_UNUSED int ret = 0; + xlator_t *victim = data; + +- glusterfs_graph_t *graph = NULL; +- +- GF_VALIDATE_OR_GOTO("notify", this, out); +- graph = this->graph; +- GF_VALIDATE_OR_GOTO(this->name, graph, out); +- + switch (event) { + case GF_EVENT_PARENT_UP: + case GF_EVENT_PARENT_DOWN: { +@@ -165,17 +159,6 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + xlator_notify(parent->xlator, event, this, NULL); + parent = parent->next; + } +- +- if (event == GF_EVENT_CHILD_DOWN && +- !(this->ctx && this->ctx->master) && (graph->top == this)) { +- /* Make sure this is not a daemon with master xlator */ +- pthread_mutex_lock(&graph->mutex); +- { +- graph->used = 0; +- pthread_cond_broadcast(&graph->child_down_cond); +- } +- pthread_mutex_unlock(&graph->mutex); +- } + } break; + case GF_EVENT_UPCALL: { + xlator_list_t *parent = this->parents; +@@ -222,7 +205,7 @@ default_notify(xlator_t *this, int32_t event, void *data, ...) + * nothing to do with readability. + */ + } +-out: ++ + return 0; + } + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 9ec2365..2cedf1a 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -597,10 +597,6 @@ struct _glusterfs_graph { + int used; /* Should be set when fuse gets + first CHILD_UP */ + uint32_t volfile_checksum; +- void *last_xl; /* Stores the last xl of the graph, as of now only populated +- in client multiplexed code path */ +- pthread_mutex_t mutex; +- pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */ + }; + typedef struct _glusterfs_graph glusterfs_graph_t; + +@@ -743,7 +739,6 @@ typedef struct { + char volfile_checksum[SHA256_DIGEST_LENGTH]; + char vol_id[NAME_MAX + 1]; + struct list_head volfile_list; +- glusterfs_graph_t *graph; + + } gf_volfile_t; + +@@ -827,6 +822,4 @@ gf_free_mig_locks(lock_migration_info_t *locks); + + int + glusterfs_read_secure_access_file(void); +-int +-glusterfs_graph_fini(glusterfs_graph_t *graph); + #endif /* _GLUSTERFS_H */ +diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h +index ea2aa60..1b72f6d 100644 +--- a/libglusterfs/src/glusterfs/libglusterfs-messages.h ++++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h +@@ -109,8 +109,6 @@ GLFS_MSGID( + LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST, + LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED, + LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG, +- LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE, +- LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED, +- LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED); ++ LG_MSG_XXH64_TO_GFID_FAILED); + + #endif /* !_LG_MESSAGES_H_ */ +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 8998976..b78daad 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1089,7 +1089,4 @@ handle_default_options(xlator_t *xl, dict_t *options); + + void + gluster_graph_take_reference(xlator_t *tree); +- +-gf_boolean_t +-mgmt_is_multiplexed_daemon(char *name); + #endif /* _XLATOR_H */ +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index a492dd8..bb5e67a 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -114,53 +114,6 @@ out: + return cert_depth; + } + +-xlator_t * +-glusterfs_get_last_xlator(glusterfs_graph_t *graph) +-{ +- xlator_t *trav = graph->first; +- if (!trav) +- return NULL; +- +- while (trav->next) +- trav = trav->next; +- +- return trav; +-} +- +-xlator_t * +-glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl) +-{ +- xlator_list_t *unlink = NULL; +- xlator_list_t *prev = NULL; +- xlator_list_t **tmp = NULL; +- xlator_t *next_child = NULL; +- xlator_t *xl = NULL; +- +- for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) { +- if ((*tmp)->xlator == cxl) { +- unlink = *tmp; +- *tmp = (*tmp)->next; +- if (*tmp) +- next_child = (*tmp)->xlator; +- break; +- } +- prev = *tmp; +- } +- +- if (!prev) +- xl = pxl; +- else if (prev->xlator) +- xl = prev->xlator->graph->last_xl; +- +- if (xl) +- xl->next = next_child; +- if (next_child) +- next_child->prev = xl; +- +- GF_FREE(unlink); +- return next_child; +-} +- + int + glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl) + { +@@ -1139,8 +1092,6 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph) + ret = xlator_tree_free_memacct(graph->first); + + list_del_init(&graph->list); +- pthread_mutex_destroy(&graph->mutex); +- pthread_cond_destroy(&graph->child_down_cond); + GF_FREE(graph); + + return ret; +@@ -1183,25 +1134,6 @@ out: + } + + int +-glusterfs_graph_fini(glusterfs_graph_t *graph) +-{ +- xlator_t *trav = NULL; +- +- trav = graph->first; +- +- while (trav) { +- if (trav->init_succeeded) { +- trav->cleanup_starting = 1; +- trav->fini(trav); +- trav->init_succeeded = 0; +- } +- trav = trav->next; +- } +- +- return 0; +-} +- +-int + glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, + glusterfs_graph_t **newgraph) + { +@@ -1324,386 +1256,3 @@ glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path, + + return 0; + } +-int +-glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx, +- glusterfs_graph_t *parent_graph) +-{ +- if (parent_graph) { +- if (parent_graph->first) { +- xlator_destroy(parent_graph->first); +- } +- ctx->active = NULL; +- GF_FREE(parent_graph); +- parent_graph = NULL; +- } +- return 0; +-} +- +-void * +-glusterfs_graph_cleanup(void *arg) +-{ +- glusterfs_graph_t *graph = NULL; +- glusterfs_ctx_t *ctx = THIS->ctx; +- int ret = -1; +- graph = arg; +- +- if (!graph) +- return NULL; +- +- /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN +- * Then wait for GF_EVENT_CHILD_DOWN to get on the top +- * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed +- * to fini. +- * +- * During fini call, this will take a last unref on rpc and +- * rpc_transport_object. +- */ +- if (graph->first) +- default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first); +- +- ret = pthread_mutex_lock(&graph->mutex); +- if (ret != 0) { +- gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, +- "Failed to aquire a lock"); +- goto out; +- } +- /* check and wait for CHILD_DOWN for top xlator*/ +- while (graph->used) { +- ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex); +- if (ret != 0) +- gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED, +- "cond wait failed "); +- } +- +- ret = pthread_mutex_unlock(&graph->mutex); +- if (ret != 0) { +- gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED, +- "Failed to release a lock"); +- } +- +- /* Though we got a child down on top xlator, we have to wait until +- * all the notifier to exit. Because there should not be any threads +- * that access xl variables. +- */ +- pthread_mutex_lock(&ctx->notify_lock); +- { +- while (ctx->notifying) +- pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock); +- } +- pthread_mutex_unlock(&ctx->notify_lock); +- +- glusterfs_graph_fini(graph); +- glusterfs_graph_destroy(graph); +-out: +- return NULL; +-} +- +-glusterfs_graph_t * +-glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name, +- char *type) +-{ +- glusterfs_graph_t *parent_graph = NULL; +- xlator_t *ixl = NULL; +- int ret = -1; +- parent_graph = GF_CALLOC(1, sizeof(*parent_graph), +- gf_common_mt_glusterfs_graph_t); +- if (!parent_graph) +- goto out; +- +- INIT_LIST_HEAD(&parent_graph->list); +- +- ctx->active = parent_graph; +- ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t); +- if (!ixl) +- goto out; +- +- ixl->ctx = ctx; +- ixl->graph = parent_graph; +- ixl->options = dict_new(); +- if (!ixl->options) +- goto out; +- +- ixl->name = gf_strdup(name); +- if (!ixl->name) +- goto out; +- +- ixl->is_autoloaded = 1; +- +- if (xlator_set_type(ixl, type) == -1) { +- gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED, +- "%s (%s) set type failed", name, type); +- goto out; +- } +- +- glusterfs_graph_set_first(parent_graph, ixl); +- parent_graph->top = ixl; +- ixl = NULL; +- +- gettimeofday(&parent_graph->dob, NULL); +- fill_uuid(parent_graph->graph_uuid, 128); +- parent_graph->id = ctx->graph_id++; +- ret = 0; +-out: +- if (ixl) +- xlator_destroy(ixl); +- +- if (ret) { +- glusterfs_muxsvc_cleanup_parent(ctx, parent_graph); +- parent_graph = NULL; +- } +- return parent_graph; +-} +- +-int +-glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj) +-{ +- xlator_t *last_xl = NULL; +- glusterfs_graph_t *graph = NULL; +- glusterfs_graph_t *parent_graph = NULL; +- pthread_t clean_graph = { +- 0, +- }; +- int ret = -1; +- xlator_t *xl = NULL; +- +- if (!ctx || !ctx->active || !volfile_obj) +- goto out; +- parent_graph = ctx->active; +- graph = volfile_obj->graph; +- if (graph && graph->first) +- xl = graph->first; +- +- last_xl = graph->last_xl; +- if (last_xl) +- last_xl->next = NULL; +- if (!xl || xl->cleanup_starting) +- goto out; +- +- xl->cleanup_starting = 1; +- gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED, +- "detaching child %s", volfile_obj->vol_id); +- +- list_del_init(&volfile_obj->volfile_list); +- glusterfs_mux_xlator_unlink(parent_graph->top, xl); +- parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph); +- parent_graph->xl_count -= graph->xl_count; +- parent_graph->leaf_count -= graph->leaf_count; +- default_notify(xl, GF_EVENT_PARENT_DOWN, xl); +- parent_graph->id++; +- ret = 0; +-out: +- if (!ret) { +- list_del_init(&volfile_obj->volfile_list); +- if (graph) { +- ret = gf_thread_create_detached( +- &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean"); +- if (ret) { +- gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, +- LG_MSG_GRAPH_CLEANUP_FAILED, +- "%s failed to create clean " +- "up thread", +- volfile_obj->vol_id); +- ret = 0; +- } +- } +- GF_FREE(volfile_obj); +- } +- return ret; +-} +- +-int +-glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp, +- char *volfile_id, char *checksum) +-{ +- glusterfs_graph_t *graph = NULL; +- glusterfs_graph_t *parent_graph = NULL; +- glusterfs_graph_t *clean_graph = NULL; +- int ret = -1; +- xlator_t *xl = NULL; +- xlator_t *last_xl = NULL; +- gf_volfile_t *volfile_obj = NULL; +- pthread_t thread_id = { +- 0, +- }; +- +- if (!ctx) +- goto out; +- parent_graph = ctx->active; +- graph = glusterfs_graph_construct(fp); +- if (!graph) { +- gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, +- "failed to construct the graph"); +- goto out; +- } +- graph->last_xl = glusterfs_get_last_xlator(graph); +- +- for (xl = graph->first; xl; xl = xl->next) { +- if (strcmp(xl->type, "mount/fuse") == 0) { +- gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, +- LG_MSG_GRAPH_ATTACH_FAILED, +- "fuse xlator cannot be specified in volume file"); +- goto out; +- } +- } +- +- graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph)); +- xl = graph->first; +- /* TODO memory leaks everywhere need to free graph in case of error */ +- if (glusterfs_graph_prepare(graph, ctx, xl->name)) { +- gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, +- "failed to prepare graph for xlator %s", xl->name); +- ret = -1; +- goto out; +- } else if (glusterfs_graph_init(graph)) { +- gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, +- "failed to initialize graph for xlator %s", xl->name); +- ret = -1; +- goto out; +- } else if (glusterfs_graph_parent_up(graph)) { +- gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED, +- "failed to link the graphs for xlator %s ", xl->name); +- ret = -1; +- goto out; +- } +- +- if (!parent_graph) { +- parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd", +- "debug/io-stats"); +- if (!parent_graph) +- goto out; +- ((xlator_t *)parent_graph->top)->next = xl; +- clean_graph = parent_graph; +- } else { +- last_xl = parent_graph->last_xl; +- if (last_xl) +- last_xl->next = xl; +- xl->prev = last_xl; +- } +- parent_graph->last_xl = graph->last_xl; +- +- ret = glusterfs_xlator_link(parent_graph->top, xl); +- if (ret) { +- gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED, +- "parent up notification failed"); +- goto out; +- } +- parent_graph->xl_count += graph->xl_count; +- parent_graph->leaf_count += graph->leaf_count; +- parent_graph->id++; +- +- if (!volfile_obj) { +- volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t); +- if (!volfile_obj) { +- ret = -1; +- goto out; +- } +- } +- +- graph->used = 1; +- parent_graph->id++; +- list_add(&graph->list, &ctx->graphs); +- INIT_LIST_HEAD(&volfile_obj->volfile_list); +- volfile_obj->graph = graph; +- snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s", +- volfile_id); +- memcpy(volfile_obj->volfile_checksum, checksum, +- sizeof(volfile_obj->volfile_checksum)); +- list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list); +- +- gf_log_dump_graph(fp, graph); +- graph = NULL; +- +- ret = 0; +-out: +- if (ret) { +- if (graph) { +- gluster_graph_take_reference(graph->first); +- ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup, +- graph, "graph_clean"); +- if (ret) { +- gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, +- LG_MSG_GRAPH_CLEANUP_FAILED, +- "%s failed to create clean " +- "up thread", +- volfile_id); +- ret = 0; +- } +- } +- if (clean_graph) +- glusterfs_muxsvc_cleanup_parent(ctx, clean_graph); +- } +- return ret; +-} +- +-int +-glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx, +- gf_volfile_t *volfile_obj, char *checksum) +-{ +- glusterfs_graph_t *oldvolfile_graph = NULL; +- glusterfs_graph_t *newvolfile_graph = NULL; +- +- int ret = -1; +- +- if (!ctx) { +- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, +- "ctx is NULL"); +- goto out; +- } +- +- /* Change the message id */ +- if (!volfile_obj) { +- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL, +- "failed to get volfile object"); +- goto out; +- } +- +- oldvolfile_graph = volfile_obj->graph; +- if (!oldvolfile_graph) { +- goto out; +- } +- +- newvolfile_graph = glusterfs_graph_construct(newvolfile_fp); +- +- if (!newvolfile_graph) { +- goto out; +- } +- newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph); +- +- glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first); +- +- if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) { +- ret = glusterfs_process_svc_detach(ctx, volfile_obj); +- if (ret) { +- gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, +- LG_MSG_GRAPH_CLEANUP_FAILED, +- "Could not detach " +- "old graph. Aborting the reconfiguration operation"); +- goto out; +- } +- ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, +- volfile_obj->vol_id, checksum); +- goto out; +- } +- +- gf_msg_debug("glusterfsd-mgmt", 0, +- "Only options have changed in the" +- " new graph"); +- +- ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph); +- if (ret) { +- gf_msg_debug("glusterfsd-mgmt", 0, +- "Could not reconfigure " +- "new options in old graph"); +- goto out; +- } +- memcpy(volfile_obj->volfile_checksum, checksum, +- sizeof(volfile_obj->volfile_checksum)); +- +- ret = 0; +-out: +- +- if (newvolfile_graph) +- glusterfs_graph_destroy(newvolfile_graph); +- +- return ret; +-} +diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y +index c60ff38..5b92985 100644 +--- a/libglusterfs/src/graph.y ++++ b/libglusterfs/src/graph.y +@@ -542,9 +542,6 @@ glusterfs_graph_new () + + INIT_LIST_HEAD (&graph->list); + +- pthread_mutex_init(&graph->mutex, NULL); +- pthread_cond_init(&graph->child_down_cond, NULL); +- + gettimeofday (&graph->dob, NULL); + + return graph; +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 05f93b4..4dca7de 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1155,8 +1155,3 @@ gf_changelog_register_generic + gf_gfid_generate_from_xxh64 + find_xlator_option_in_cmd_args_t + gf_d_type_from_ia_type +-glusterfs_graph_fini +-glusterfs_process_svc_attach_volfp +-glusterfs_mux_volfile_reconfigure +-glusterfs_process_svc_detach +-mgmt_is_multiplexed_daemon +diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c +index 0cf80c0..d18b50f 100644 +--- a/libglusterfs/src/statedump.c ++++ b/libglusterfs/src/statedump.c +@@ -810,8 +810,7 @@ gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx) + if (!ctx) + goto out; + +- if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) && +- (ctx && ctx->active)) { ++ if (ctx && ctx->active) { + top = ctx->active->first; + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + brick_count++; +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 022c3ed..6bd4f09 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1470,19 +1470,3 @@ gluster_graph_take_reference(xlator_t *tree) + } + return; + } +- +-gf_boolean_t +-mgmt_is_multiplexed_daemon(char *name) +-{ +- const char *mux_daemons[] = {"glustershd", NULL}; +- int i; +- +- if (!name) +- return _gf_false; +- +- for (i = 0; mux_daemons[i]; i++) { +- if (!strcmp(name, mux_daemons[i])) +- return _gf_true; +- } +- return _gf_false; +-} +diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h +index 7275d75..779878f 100644 +--- a/rpc/rpc-lib/src/protocol-common.h ++++ b/rpc/rpc-lib/src/protocol-common.h +@@ -245,8 +245,6 @@ enum glusterd_brick_procnum { + GLUSTERD_NODE_BITROT, + GLUSTERD_BRICK_ATTACH, + GLUSTERD_DUMP_METRICS, +- GLUSTERD_SVC_ATTACH, +- GLUSTERD_SVC_DETACH, + GLUSTERD_BRICK_MAXVALUE, + }; + +diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t +index 7dae3c3..ca112ad 100644 +--- a/tests/basic/glusterd/heald.t ++++ b/tests/basic/glusterd/heald.t +@@ -7,16 +7,11 @@ + # Covers enable/disable at the moment. Will be enhanced later to include + # the other commands as well. + +-function is_pid_running { +- local pid=$1 +- num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l` +- echo $num +-} +- + cleanup; + TEST glusterd + TEST pidof glusterd + ++volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" + #Commands should fail when volume doesn't exist + TEST ! $CLI volume heal non-existent-volume enable + TEST ! $CLI volume heal non-existent-volume disable +@@ -25,55 +20,51 @@ TEST ! $CLI volume heal non-existent-volume disable + # volumes + TEST $CLI volume create dist $H0:$B0/dist + TEST $CLI volume start dist +-TEST "[ -z $(get_shd_process_pid dist)]" ++TEST "[ -z $(get_shd_process_pid)]" + TEST ! $CLI volume heal dist enable + TEST ! $CLI volume heal dist disable + + # Commands should work on replicate/disperse volume. + TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1 +-TEST "[ -z $(get_shd_process_pid r2)]" ++TEST "[ -z $(get_shd_process_pid)]" + TEST $CLI volume start r2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 enable + EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" +-volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol" +-EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2 +-pid=$( get_shd_process_pid r2 ) ++EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 disable + EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon +-EXPECT "1" is_pid_running $pid ++EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + + # Commands should work on disperse volume. + TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 + TEST $CLI volume start ec2 +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 enable + EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol" +-EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2 +-pid=$(get_shd_process_pid ec2) ++EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 disable + EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon +-EXPECT "1" is_pid_running $pid ++EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + + #Check that shd graph is rewritten correctly on volume stop/start +-EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse +- +-EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop r2 +-EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop ec2 + # When both the volumes are stopped glustershd volfile is not modified just the + # process is stopped +-TEST "[ -z $(get_shd_process_pid dist) ]" +-TEST "[ -z $(get_shd_process_pid ec2) ]" ++TEST "[ -z $(get_shd_process_pid) ]" + + TEST $CLI volume start r2 +-EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate ++EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate + + TEST $CLI volume set r2 self-heal-daemon on + TEST $CLI volume set r2 cluster.self-heal-daemon off +diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t +index e6e65c4..cdb1a33 100644 +--- a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t ++++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t +@@ -55,9 +55,9 @@ TEST kill_glusterd 1 + #Bring back 1st glusterd + TEST $glusterd_1 + +-# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started +-#on node 2, because once glusterd regains quorum, it will restart all volume +-#level daemons +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2 ++# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not ++# come up on node 2 ++sleep $PROCESS_UP_TIMEOUT ++EXPECT "N" shd_up_status_2 + + cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index 022d972..76a8fd4 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -237,13 +237,11 @@ function ec_child_up_count_shd { + } + + function get_shd_process_pid { +- local vol=$1 +- ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1 ++ ps auxww | grep glusterfs | grep -E "glustershd/glustershd.pid" | awk '{print $2}' | head -1 + } + + function generate_shd_statedump { +- local vol=$1 +- generate_statedump $(get_shd_process_pid $vol) ++ generate_statedump $(get_shd_process_pid) + } + + function generate_nfs_statedump { +diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am +index 11ae189..5fe5156 100644 +--- a/xlators/mgmt/glusterd/src/Makefile.am ++++ b/xlators/mgmt/glusterd/src/Makefile.am +@@ -18,12 +18,11 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \ + glusterd-snapshot-utils.c glusterd-conn-mgmt.c \ +- glusterd-proc-mgmt.c glusterd-svc-mgmt.c \ ++ glusterd-proc-mgmt.c glusterd-svc-mgmt.c glusterd-shd-svc.c \ + glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \ + glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \ + glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \ + glusterd-reset-brick.c glusterd-tierd-svc.c glusterd-tierd-svc-helper.c \ +- glusterd-shd-svc.c glusterd-shd-svc-helper.c \ + glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c + + +@@ -39,12 +38,11 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ + glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \ + glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \ + glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \ +- glusterd-svc-mgmt.h glusterd-nfs-svc.h \ ++ glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \ + glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \ + glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \ + glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \ + glusterd-tierd-svc.h glusterd-tierd-svc-helper.h \ +- glusterd-shd-svc.h glusterd-shd-svc-helper.h \ + glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \ + $(CONTRIBDIR)/userspace-rcu/rculist-extra.h + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index 042a805..ad9a572 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2863,7 +2863,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr) + } + + if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) { +- ret = glusterd_svcs_reconfigure(volinfo); ++ ret = glusterd_svcs_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +index 16eefa1..c6d7a00 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c +@@ -138,45 +138,3 @@ glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + glusterd_set_socket_filepath(sockfilepath, socketpath, len); + return 0; + } +- +-int +-__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, +- rpc_clnt_event_t event, void *data) +-{ +- glusterd_conf_t *conf = THIS->private; +- glusterd_svc_proc_t *mux_proc = mydata; +- int ret = -1; +- +- /* Silently ignoring this error, exactly like the current +- * implementation */ +- if (!mux_proc) +- return 0; +- +- if (event == RPC_CLNT_DESTROY) { +- /*RPC_CLNT_DESTROY will only called after mux_proc detached from the +- * list. So it is safe to call without lock. Processing +- * RPC_CLNT_DESTROY under a lock will lead to deadlock. +- */ +- if (mux_proc->data) { +- glusterd_volinfo_unref(mux_proc->data); +- mux_proc->data = NULL; +- } +- GF_FREE(mux_proc); +- ret = 0; +- } else { +- pthread_mutex_lock(&conf->attach_lock); +- { +- ret = mux_proc->notify(mux_proc, event); +- } +- pthread_mutex_unlock(&conf->attach_lock); +- } +- return ret; +-} +- +-int +-glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, +- rpc_clnt_event_t event, void *data) +-{ +- return glusterd_big_locked_notify(rpc, mydata, event, data, +- __glusterd_muxsvc_conn_common_notify); +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h +index d1c4607..602c0ba 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h +@@ -43,11 +43,9 @@ glusterd_conn_disconnect(glusterd_conn_t *conn); + int + glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); +-int +-glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata, +- rpc_clnt_event_t event, void *data); + + int32_t + glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath, + int len); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +index b01fd4d..f9c8617 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c +@@ -370,7 +370,6 @@ int + glusterd_gfproxydsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; +- glusterd_volinfo_t *tmp = NULL; + int ret = -1; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -381,7 +380,7 @@ glusterd_gfproxydsvc_restart() + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + +- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) ++ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + /* Start per volume gfproxyd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index ac788a0..cb2666b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -5940,11 +5940,6 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + + GF_FREE(rebal_data); + +- fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count, +- volinfo->shd.svc.online ? "Online" : "Offline"); +- fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count, +- volinfo->shd.svc.inited ? "True" : "False"); +- + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + ret = glusterd_volume_get_hot_tier_type_str(volinfo, + &hot_tier_type_str); +@@ -6014,6 +6009,12 @@ glusterd_get_state(rpcsvc_request_t *req, dict_t *dict) + + fprintf(fp, "\n[Services]\n"); + ++ if (priv->shd_svc.inited) { ++ fprintf(fp, "svc%d.name: %s\n", ++count, priv->shd_svc.name); ++ fprintf(fp, "svc%d.online_status: %s\n\n", count, ++ priv->shd_svc.online ? "Online" : "Offline"); ++ } ++ + if (priv->nfs_svc.inited) { + fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name); + fprintf(fp, "svc%d.online_status: %s\n\n", count, +diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c +index 1ba58c3..5599a63 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c +@@ -30,7 +30,6 @@ + #include "rpcsvc.h" + #include "rpc-common-xdr.h" + #include "glusterd-gfproxyd-svc-helper.h" +-#include "glusterd-shd-svc-helper.h" + + extern struct rpc_clnt_program gd_peer_prog; + extern struct rpc_clnt_program gd_mgmt_prog; +@@ -329,26 +328,6 @@ build_volfile_path(char *volume_id, char *path, size_t path_len, + goto out; + } + +- volid_ptr = strstr(volume_id, "shd/"); +- if (volid_ptr) { +- volid_ptr = strchr(volid_ptr, '/'); +- if (!volid_ptr) { +- ret = -1; +- goto out; +- } +- volid_ptr++; +- +- ret = glusterd_volinfo_find(volid_ptr, &volinfo); +- if (ret == -1) { +- gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo"); +- goto out; +- } +- +- glusterd_svc_build_shd_volfile_path(volinfo, path, path_len); +- ret = 0; +- goto out; +- } +- + volid_ptr = strstr(volume_id, "/snaps/"); + if (volid_ptr) { + ret = get_snap_volname_and_volinfo(volid_ptr, &volname, &volinfo); +diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h +index 17052ce..7a784db 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h ++++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h +@@ -51,7 +51,6 @@ typedef enum gf_gld_mem_types_ { + gf_gld_mt_missed_snapinfo_t, + gf_gld_mt_snap_create_args_t, + gf_gld_mt_glusterd_brick_proc_t, +- gf_gld_mt_glusterd_svc_proc_t, + gf_gld_mt_end, + } gf_gld_mem_types_t; + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h +index 424e15f..c7b3ca8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-messages.h ++++ b/xlators/mgmt/glusterd/src/glusterd-messages.h +@@ -298,8 +298,6 @@ GLFS_MSGID( + GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE, + GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL, + GD_MSG_MANAGER_FUNCTION_FAILED, GD_MSG_NFS_GANESHA_DISABLED, +- GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL, +- GD_MSG_SHD_START_FAIL, GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, +- GD_MSG_ATTACH_INFO, GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL); ++ GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 9ea695e..0d29de2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -44,7 +44,6 @@ + #include "glusterd-snapshot-utils.h" + #include "glusterd-svc-mgmt.h" + #include "glusterd-svc-helper.h" +-#include "glusterd-shd-svc-helper.h" + #include "glusterd-shd-svc.h" + #include "glusterd-nfs-svc.h" + #include "glusterd-quotad-svc.h" +@@ -2225,11 +2224,6 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key, + if (ret) + goto out; + +- svc = &(volinfo->shd.svc); +- ret = svc->reconfigure(volinfo); +- if (ret) +- goto out; +- + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -2244,7 +2238,7 @@ glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key, + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(volinfo); ++ ret = glusterd_svcs_reconfigure(); + if (ret) + goto out; + } +@@ -2700,11 +2694,6 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, + if (ret) + goto out; + +- svc = &(volinfo->shd.svc); +- ret = svc->reconfigure(volinfo); +- if (ret) +- goto out; +- + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -2718,7 +2707,7 @@ glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict, + } + } + if (svcs_reconfigure) { +- ret = glusterd_svcs_reconfigure(NULL); ++ ret = glusterd_svcs_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart " +@@ -3103,11 +3092,6 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + if (ret) + goto out; + +- svc = &(volinfo->shd.svc); +- ret = svc->reconfigure(volinfo); +- if (ret) +- goto out; +- + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -3123,7 +3107,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(volinfo); ++ ret = glusterd_svcs_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); +@@ -3156,11 +3140,6 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + if (ret) + goto out; + +- svc = &(volinfo->shd.svc); +- ret = svc->reconfigure(volinfo); +- if (ret) +- goto out; +- + ret = glusterd_create_volfiles_and_notify_services(volinfo); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, +@@ -3176,7 +3155,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(volinfo); ++ ret = glusterd_svcs_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL, + "Unable to restart services"); +@@ -3383,7 +3362,7 @@ glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { +- ret = glusterd_svcs_reconfigure(volinfo); ++ ret = glusterd_svcs_reconfigure(); + if (ret) + goto out; + } +@@ -3666,6 +3645,14 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + other_count++; + node_count++; + ++ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { ++ ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, 0, ++ vol_opts); ++ if (ret) ++ goto out; ++ other_count++; ++ node_count++; ++ + } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0, + vol_opts); +@@ -3699,12 +3686,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + goto out; + other_count++; + node_count++; +- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { +- ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index); +- if (ret) +- goto out; +- other_count++; +- node_count++; + } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick); + if (ret) +@@ -3767,19 +3748,6 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + node_count++; + } + +- if (glusterd_is_shd_compatible_volume(volinfo)) { +- shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); +- if (shd_enabled) { +- ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, +- other_index); +- if (ret) +- goto out; +- other_count++; +- other_index++; +- node_count++; +- } +- } +- + nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY, + _gf_false); + if (!nfs_disabled) { +@@ -3792,6 +3760,18 @@ glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + node_count++; + } + ++ if (glusterd_is_shd_compatible_volume(volinfo)) ++ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts); ++ if (shd_enabled) { ++ ret = glusterd_add_node_to_dict(priv->shd_svc.name, rsp_dict, ++ other_index, vol_opts); ++ if (ret) ++ goto out; ++ other_count++; ++ node_count++; ++ other_index++; ++ } ++ + if (glusterd_is_volume_quota_enabled(volinfo)) { + ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, + other_index, vol_opts); +@@ -6904,18 +6884,16 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op, + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; +- glusterd_svc_t *svc = NULL; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); +- svc = &(volinfo->shd.svc); + + switch (heal_op) { + case GF_SHD_OP_INDEX_SUMMARY: + case GF_SHD_OP_STATISTICS_HEAL_COUNT: +- if (!svc->online) { ++ if (!priv->shd_svc.online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " +@@ -6936,7 +6914,7 @@ glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op, + break; + + case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: +- if (!svc->online) { ++ if (!priv->shd_svc.online) { + if (!rsp_dict) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL, + "Received " +@@ -7071,7 +7049,7 @@ glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr, + ret = -1; + goto out; + } else { +- pending_node->node = &(volinfo->shd.svc); ++ pending_node->node = &(priv->shd_svc); + pending_node->type = GD_NODE_SHD; + cds_list_add_tail(&pending_node->list, selected); + pending_node = NULL; +@@ -7205,7 +7183,6 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + glusterd_pending_node_t *pending_node = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; +- glusterd_svc_t *svc = NULL; + + GF_ASSERT(dict); + +@@ -7301,8 +7278,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + + ret = 0; + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { +- svc = &(volinfo->shd.svc); +- if (!svc->online) { ++ if (!priv->shd_svc.online) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED, + "Self-heal daemon is not running"); +@@ -7314,7 +7290,7 @@ glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr, + ret = -1; + goto out; + } +- pending_node->node = svc; ++ pending_node->node = &(priv->shd_svc); + pending_node->type = GD_NODE_SHD; + pending_node->index = 0; + cds_list_add_tail(&pending_node->list, selected); +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c +deleted file mode 100644 +index 9196758..0000000 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c ++++ /dev/null +@@ -1,140 +0,0 @@ +-/* +- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +- This file is part of GlusterFS. +- +- This file is licensed to you under your choice of the GNU Lesser +- General Public License, version 3 or any later version (LGPLv3 or +- later), or the GNU General Public License, version 2 (GPLv2), in all +- cases as published by the Free Software Foundation. +-*/ +- +-#include "glusterd.h" +-#include "glusterd-utils.h" +-#include "glusterd-shd-svc-helper.h" +-#include "glusterd-messages.h" +-#include "glusterd-volgen.h" +- +-void +-glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, +- int path_len) +-{ +- char sockfilepath[PATH_MAX] = { +- 0, +- }; +- char rundir[PATH_MAX] = { +- 0, +- }; +- int32_t len = 0; +- glusterd_conf_t *priv = THIS->private; +- +- if (!priv) +- return; +- +- GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); +- len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir, +- uuid_utoa(MY_UUID)); +- if ((len < 0) || (len >= sizeof(sockfilepath))) { +- sockfilepath[0] = 0; +- } +- +- glusterd_set_socket_filepath(sockfilepath, path, path_len); +-} +- +-void +-glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, +- int path_len) +-{ +- char rundir[PATH_MAX] = { +- 0, +- }; +- glusterd_conf_t *priv = THIS->private; +- +- if (!priv) +- return; +- +- GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); +- +- snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname); +-} +- +-void +-glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, +- int path_len) +-{ +- char workdir[PATH_MAX] = { +- 0, +- }; +- glusterd_conf_t *priv = THIS->private; +- +- if (!priv) +- return; +- +- GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv); +- +- snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname); +-} +- +-void +-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len) +-{ +- snprintf(logdir, len, "%s/shd/%s", DEFAULT_LOG_FILE_DIRECTORY, volname); +-} +- +-void +-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len) +-{ +- snprintf(logfile, len, "%s/shd.log", logdir); +-} +- +-void +-glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd) +-{ +- glusterd_svc_proc_t *svc_proc = NULL; +- glusterd_svc_t *svc = NULL; +- glusterd_conf_t *conf = NULL; +- gf_boolean_t need_unref = _gf_false; +- rpc_clnt_t *rpc = NULL; +- +- conf = THIS->private; +- if (!conf) +- return; +- +- GF_VALIDATE_OR_GOTO(THIS->name, conf, out); +- GF_VALIDATE_OR_GOTO(THIS->name, shd, out); +- +- svc = &shd->svc; +- shd->attached = _gf_false; +- +- if (svc->conn.rpc) { +- rpc_clnt_unref(svc->conn.rpc); +- svc->conn.rpc = NULL; +- } +- +- pthread_mutex_lock(&conf->attach_lock); +- { +- svc_proc = svc->svc_proc; +- svc->svc_proc = NULL; +- svc->inited = _gf_false; +- cds_list_del_init(&svc->mux_svc); +- glusterd_unlink_file(svc->proc.pidfile); +- +- if (svc_proc && cds_list_empty(&svc_proc->svcs)) { +- cds_list_del_init(&svc_proc->svc_proc_list); +- /* We cannot free svc_proc list from here. Because +- * if there are pending events on the rpc, it will +- * try to access the corresponding svc_proc, so unrefing +- * rpc request and then cleaning up the memory is carried +- * from the notify function upon RPC_CLNT_DESTROY destroy. +- */ +- need_unref = _gf_true; +- rpc = svc_proc->rpc; +- svc_proc->rpc = NULL; +- } +- } +- pthread_mutex_unlock(&conf->attach_lock); +- /*rpc unref has to be performed outside the lock*/ +- if (need_unref && rpc) +- rpc_clnt_unref(rpc); +-out: +- return; +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h +deleted file mode 100644 +index c70702c..0000000 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h ++++ /dev/null +@@ -1,45 +0,0 @@ +-/* +- Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +- This file is part of GlusterFS. +- +- This file is licensed to you under your choice of the GNU Lesser +- General Public License, version 3 or any later version (LGPLv3 or +- later), or the GNU General Public License, version 2 (GPLv2), in all +- cases as published by the Free Software Foundation. +-*/ +- +-#ifndef _GLUSTERD_SHD_SVC_HELPER_H_ +-#define _GLUSTERD_SHD_SVC_HELPER_H_ +- +-#include "glusterd.h" +-#include "glusterd-svc-mgmt.h" +- +-void +-glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path, +- int path_len); +- +-void +-glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path, +- int path_len); +- +-void +-glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path, +- int path_len); +- +-void +-glusterd_svc_build_shd_logdir(char *logdir, char *volname, size_t len); +- +-void +-glusterd_svc_build_shd_logfile(char *logfile, char *logdir, size_t len); +- +-void +-glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd); +- +-int +-glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, +- glusterd_svc_t *svc, int flags); +- +-int +-glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo); +- +-#endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +index 4789843..f5379b0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c +@@ -13,10 +13,9 @@ + #include "glusterd.h" + #include "glusterd-utils.h" + #include "glusterd-volgen.h" ++#include "glusterd-svc-mgmt.h" + #include "glusterd-shd-svc.h" +-#include "glusterd-shd-svc-helper.h" + #include "glusterd-svc-helper.h" +-#include "glusterd-store.h" + + #define GD_SHD_PROCESS_NAME "--process-name" + char *shd_svc_name = "glustershd"; +@@ -24,145 +23,27 @@ char *shd_svc_name = "glustershd"; + void + glusterd_shdsvc_build(glusterd_svc_t *svc) + { +- int ret = -1; +- ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); +- if (ret < 0) +- return; +- +- CDS_INIT_LIST_HEAD(&svc->mux_svc); + svc->manager = glusterd_shdsvc_manager; + svc->start = glusterd_shdsvc_start; +- svc->stop = glusterd_shdsvc_stop; +- svc->reconfigure = glusterd_shdsvc_reconfigure; ++ svc->stop = glusterd_svc_stop; + } + + int +-glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, +- glusterd_svc_proc_t *mux_svc) ++glusterd_shdsvc_init(glusterd_svc_t *svc) + { +- int ret = -1; +- char rundir[PATH_MAX] = { +- 0, +- }; +- char sockpath[PATH_MAX] = { +- 0, +- }; +- char pidfile[PATH_MAX] = { +- 0, +- }; +- char volfile[PATH_MAX] = { +- 0, +- }; +- char logdir[PATH_MAX] = { +- 0, +- }; +- char logfile[PATH_MAX] = { +- 0, +- }; +- char volfileid[256] = {0}; +- glusterd_svc_t *svc = NULL; +- glusterd_volinfo_t *volinfo = NULL; +- glusterd_conf_t *priv = NULL; +- glusterd_muxsvc_conn_notify_t notify = NULL; +- xlator_t *this = NULL; +- char *volfileserver = NULL; +- int32_t len = 0; +- +- this = THIS; +- GF_VALIDATE_OR_GOTO(THIS->name, this, out); +- +- priv = this->private; +- GF_VALIDATE_OR_GOTO(this->name, priv, out); +- +- volinfo = data; +- GF_VALIDATE_OR_GOTO(this->name, data, out); +- GF_VALIDATE_OR_GOTO(this->name, mux_svc, out); +- +- svc = &(volinfo->shd.svc); +- +- ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name); +- if (ret < 0) +- goto out; +- +- notify = glusterd_muxsvc_common_rpc_notify; +- glusterd_store_perform_node_state_store(volinfo); +- +- GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv); +- glusterd_svc_create_rundir(rundir); +- +- glusterd_svc_build_shd_logdir(logdir, volinfo->volname, sizeof(logdir)); +- glusterd_svc_build_shd_logfile(logfile, logdir, sizeof(logfile)); +- +- /* Initialize the connection mgmt */ +- if (mux_conn && mux_svc->rpc) { +- /* multiplexed svc */ +- svc->conn.frame_timeout = mux_conn->frame_timeout; +- /* This will be unrefed from glusterd_shd_svcproc_cleanup*/ +- svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc); +- ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s", +- mux_conn->sockpath); +- } else { +- ret = mkdir_p(logdir, 0755, _gf_true); +- if ((ret == -1) && (EEXIST != errno)) { +- gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, +- "Unable to create logdir %s", logdir); +- goto out; +- } +- +- glusterd_svc_build_shd_socket_filepath(volinfo, sockpath, +- sizeof(sockpath)); +- ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600, +- notify); +- if (ret) +- goto out; +- /* This will be unrefed when the last svcs is detached from the list */ +- if (!mux_svc->rpc) +- mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc); +- } +- +- /* Initialize the process mgmt */ +- glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile)); +- glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX); +- len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname); +- if ((len < 0) || (len >= sizeof(volfileid))) { +- ret = -1; +- goto out; +- } +- +- if (dict_get_strn(this->options, "transport.socket.bind-address", +- SLEN("transport.socket.bind-address"), +- &volfileserver) != 0) { +- volfileserver = "localhost"; +- } +- ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir, +- logfile, volfile, volfileid, volfileserver); +- if (ret) +- goto out; +- +-out: +- gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); +- return ret; ++ return glusterd_svc_init(svc, shd_svc_name); + } + +-int +-glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) ++static int ++glusterd_shdsvc_create_volfile() + { + char filepath[PATH_MAX] = { + 0, + }; +- + int ret = -1; ++ glusterd_conf_t *conf = THIS->private; + dict_t *mod_dict = NULL; + +- glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX); +- if (!glusterd_is_shd_compatible_volume(volinfo)) { +- /* If volfile exist, delete it. This case happens when we +- * change from replica/ec to distribute. +- */ +- (void)glusterd_unlink_file(filepath); +- ret = 0; +- goto out; +- } + mod_dict = dict_new(); + if (!mod_dict) + goto out; +@@ -183,7 +64,9 @@ glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + +- ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict); ++ glusterd_svc_build_volfile_path(shd_svc_name, conf->workdir, filepath, ++ sizeof(filepath)); ++ ret = glusterd_create_global_volfile(build_shd_graph, filepath, mod_dict); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL, + "Failed to create volfile"); +@@ -198,89 +81,26 @@ out: + return ret; + } + +-gf_boolean_t +-glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc) +-{ +- glusterd_svc_proc_t *svc_proc = NULL; +- glusterd_shdsvc_t *shd = NULL; +- glusterd_svc_t *temp_svc = NULL; +- glusterd_volinfo_t *volinfo = NULL; +- gf_boolean_t comp = _gf_false; +- glusterd_conf_t *conf = THIS->private; +- +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- pthread_mutex_lock(&conf->attach_lock); +- { +- svc_proc = svc->svc_proc; +- if (!svc_proc) +- goto unlock; +- cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc) +- { +- /* Get volinfo->shd from svc object */ +- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); +- if (!shd) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, +- "Failed to get shd object " +- "from shd service"); +- goto unlock; +- } +- +- /* Get volinfo from shd */ +- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); +- if (!volinfo) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Failed to get volinfo from " +- "from shd"); +- goto unlock; +- } +- if (!glusterd_is_shd_compatible_volume(volinfo)) +- continue; +- if (volinfo->status == GLUSTERD_STATUS_STARTED) +- goto unlock; +- } +- comp = _gf_true; +- } +-unlock: +- pthread_mutex_unlock(&conf->attach_lock); +-out: +- return comp; +-} +- + int + glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + { +- int ret = -1; ++ int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + +- volinfo = data; +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); +- +- if (volinfo) +- glusterd_volinfo_ref(volinfo); +- +- ret = glusterd_shdsvc_create_volfile(volinfo); +- if (ret) +- goto out; +- +- if (!glusterd_is_shd_compatible_volume(volinfo)) { +- ret = 0; +- if (svc->inited) { +- /* This means glusterd was running for this volume and now +- * it was converted to a non-shd volume. So just stop the shd +- */ +- ret = svc->stop(svc, SIGTERM); ++ if (!svc->inited) { ++ ret = glusterd_shdsvc_init(svc); ++ if (ret) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, ++ "Failed to init shd " ++ "service"); ++ goto out; ++ } else { ++ svc->inited = _gf_true; ++ gf_msg_debug(THIS->name, 0, "shd service initialized"); + } +- goto out; + } + +- ret = glusterd_shd_svc_mux_init(volinfo, svc); +- if (ret) { +- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, +- "Failed to init shd service"); +- goto out; +- } ++ volinfo = data; + + /* If all the volumes are stopped or all shd compatible volumes + * are stopped then stop the service if: +@@ -290,26 +110,31 @@ glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags) + * - volinfo is NULL or + * - volinfo is present and volume is shd compatible + */ +- if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) { +- /* TODO +- * Take a lock and detach all svc's to stop the process +- * also reset the init flag +- */ +- ret = svc->stop(svc, SIGTERM); +- } else if (volinfo) { +- ret = svc->stop(svc, SIGTERM); +- if (ret) +- goto out; ++ if (glusterd_are_all_volumes_stopped() || ++ glusterd_all_shd_compatible_volumes_stopped()) { ++ if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) { ++ ret = svc->stop(svc, SIGTERM); ++ } ++ } else { ++ if (!(volinfo && !glusterd_is_shd_compatible_volume(volinfo))) { ++ ret = glusterd_shdsvc_create_volfile(); ++ if (ret) ++ goto out; ++ ++ ret = svc->stop(svc, SIGTERM); ++ if (ret) ++ goto out; + +- if (volinfo->status == GLUSTERD_STATUS_STARTED) { + ret = svc->start(svc, flags); + if (ret) + goto out; ++ ++ ret = glusterd_conn_connect(&(svc->conn)); ++ if (ret) ++ goto out; + } + } + out: +- if (volinfo) +- glusterd_volinfo_unref(volinfo); + if (ret) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); +@@ -318,7 +143,7 @@ out: + } + + int +-glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) ++glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) + { + int ret = -1; + char glusterd_uuid_option[PATH_MAX] = {0}; +@@ -363,136 +188,31 @@ glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags) + goto out; + + ret = glusterd_svc_start(svc, flags, cmdline); +- if (ret) +- goto out; + +- ret = glusterd_conn_connect(&(svc->conn)); + out: + if (cmdline) + dict_unref(cmdline); +- return ret; +-} + +-int +-glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo, +- glusterd_svc_t *svc, int flags) +-{ +- int ret = -1; +- glusterd_svc_proc_t *mux_proc = NULL; +- glusterd_conf_t *conf = NULL; +- +- conf = THIS->private; +- +- if (!conf || !volinfo || !svc) +- return -1; +- glusterd_shd_svcproc_cleanup(&volinfo->shd); +- mux_proc = glusterd_svcprocess_new(); +- if (!mux_proc) { +- return -1; +- } +- ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc); +- if (ret) +- return -1; +- pthread_mutex_lock(&conf->attach_lock); +- { +- cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); +- svc->svc_proc = mux_proc; +- cds_list_del_init(&svc->mux_svc); +- cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); +- } +- pthread_mutex_unlock(&conf->attach_lock); +- +- ret = glusterd_new_shd_svc_start(svc, flags); +- if (!ret) { +- volinfo->shd.attached = _gf_true; +- } +- return ret; +-} +- +-int +-glusterd_shdsvc_start(glusterd_svc_t *svc, int flags) +-{ +- int ret = -1; +- glusterd_shdsvc_t *shd = NULL; +- glusterd_volinfo_t *volinfo = NULL; +- glusterd_conf_t *conf = NULL; +- +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- conf = THIS->private; +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); +- +- /* Get volinfo->shd from svc object */ +- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); +- if (!shd) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, +- "Failed to get shd object " +- "from shd service"); +- return -1; +- } +- +- /* Get volinfo from shd */ +- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); +- if (!volinfo) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Failed to get volinfo from " +- "from shd"); +- return -1; +- } +- +- if (volinfo->status != GLUSTERD_STATUS_STARTED) +- return -1; +- +- glusterd_volinfo_ref(volinfo); +- if (!svc->inited) { +- ret = glusterd_shd_svc_mux_init(volinfo, svc); +- if (ret) +- goto out; +- } +- +- if (shd->attached) { +- ret = glusterd_attach_svc(svc, volinfo, flags); +- if (ret) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Failed to attach shd svc(volume=%s) to pid=%d. Starting" +- "a new process", +- volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- ret = glusterd_recover_shd_attach_failure(volinfo, svc, flags); +- } +- goto out; +- } +- ret = glusterd_new_shd_svc_start(svc, flags); +- if (!ret) { +- shd->attached = _gf_true; +- } +-out: +- if (volinfo) +- glusterd_volinfo_unref(volinfo); + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + + return ret; + } + + int +-glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) ++glusterd_shdsvc_reconfigure() + { + int ret = -1; + xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; + gf_boolean_t identical = _gf_false; +- dict_t *mod_dict = NULL; +- glusterd_svc_t *svc = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + +- if (!volinfo) { +- /* reconfigure will be called separately*/ +- ret = 0; +- goto out; +- } ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); + +- glusterd_volinfo_ref(volinfo); +- svc = &(volinfo->shd.svc); +- if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) ++ if (glusterd_all_shd_compatible_volumes_stopped()) + goto manager; + + /* +@@ -500,42 +220,8 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ +- +- if (!glusterd_is_shd_compatible_volume(volinfo)) { +- if (svc->inited) +- goto manager; +- +- /* Nothing to do if not shd compatible */ +- ret = 0; +- goto out; +- } +- mod_dict = dict_new(); +- if (!mod_dict) +- goto out; +- +- ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0); +- if (ret) +- goto out; +- +- ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on"); +- if (ret) +- goto out; +- +- ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on"); +- if (ret) +- goto out; +- +- ret = dict_set_int32(mod_dict, "graph-check", 1); +- if (ret) +- goto out; +- +- ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on"); +- if (ret) +- goto out; +- +- ret = glusterd_volume_svc_check_volfile_identical( +- "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, +- &identical); ++ ret = glusterd_svc_check_volfile_identical(priv->shd_svc.name, ++ build_shd_graph, &identical); + if (ret) + goto out; + +@@ -550,9 +236,8 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ +- ret = glusterd_volume_svc_check_topology_identical( +- "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile, +- &identical); ++ ret = glusterd_svc_check_topology_identical(priv->shd_svc.name, ++ build_shd_graph, &identical); + if (ret) + goto out; + +@@ -560,7 +245,7 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) + * options to shd volfile, so that shd will be reconfigured. + */ + if (identical) { +- ret = glusterd_shdsvc_create_volfile(volinfo); ++ ret = glusterd_shdsvc_create_volfile(); + if (ret == 0) { /* Only if above PASSES */ + ret = glusterd_fetchspec_notify(THIS); + } +@@ -568,129 +253,12 @@ glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo) + } + manager: + /* +- * shd volfile's topology has been changed. volfile needs +- * to be RECONFIGURED to ACT on the changed volfile. ++ * shd volfile's topology has been changed. shd server needs ++ * to be RESTARTED to ACT on the changed volfile. + */ +- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); ++ ret = priv->shd_svc.manager(&(priv->shd_svc), NULL, PROC_START_NO_WAIT); + + out: +- if (volinfo) +- glusterd_volinfo_unref(volinfo); +- if (mod_dict) +- dict_unref(mod_dict); + gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret); + return ret; + } +- +-int +-glusterd_shdsvc_restart() +-{ +- glusterd_volinfo_t *volinfo = NULL; +- glusterd_volinfo_t *tmp = NULL; +- int ret = -1; +- xlator_t *this = THIS; +- glusterd_conf_t *conf = NULL; +- glusterd_svc_t *svc = NULL; +- +- GF_VALIDATE_OR_GOTO("glusterd", this, out); +- +- conf = this->private; +- GF_VALIDATE_OR_GOTO(this->name, conf, out); +- +- pthread_mutex_lock(&conf->volume_lock); +- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) +- { +- glusterd_volinfo_ref(volinfo); +- pthread_mutex_unlock(&conf->volume_lock); +- /* Start per volume shd svc */ +- if (volinfo->status == GLUSTERD_STATUS_STARTED) { +- svc = &(volinfo->shd.svc); +- ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL, +- "Couldn't start shd for " +- "vol: %s on restart", +- volinfo->volname); +- gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s", +- volinfo->volname, svc->name); +- glusterd_volinfo_unref(volinfo); +- goto out; +- } +- } +- glusterd_volinfo_unref(volinfo); +- pthread_mutex_lock(&conf->volume_lock); +- } +- pthread_mutex_unlock(&conf->volume_lock); +-out: +- return ret; +-} +- +-int +-glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig) +-{ +- int ret = -1; +- glusterd_svc_proc_t *svc_proc = NULL; +- glusterd_shdsvc_t *shd = NULL; +- glusterd_volinfo_t *volinfo = NULL; +- gf_boolean_t empty = _gf_false; +- glusterd_conf_t *conf = NULL; +- int pid = -1; +- +- conf = THIS->private; +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- svc_proc = svc->svc_proc; +- GF_VALIDATE_OR_GOTO("glusterd", svc_proc, out); +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); +- +- /* Get volinfo->shd from svc object */ +- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); +- if (!shd) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, +- "Failed to get shd object " +- "from shd service"); +- return -1; +- } +- +- /* Get volinfo from shd */ +- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); +- if (!volinfo) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Failed to get volinfo from " +- "from shd"); +- return -1; +- } +- +- glusterd_volinfo_ref(volinfo); +- pthread_mutex_lock(&conf->attach_lock); +- { +- gf_is_service_running(svc->proc.pidfile, &pid); +- cds_list_del_init(&svc->mux_svc); +- empty = cds_list_empty(&svc_proc->svcs); +- } +- pthread_mutex_unlock(&conf->attach_lock); +- if (empty) { +- /* Unref will happen when destroying the connection */ +- glusterd_volinfo_ref(volinfo); +- svc_proc->data = volinfo; +- ret = glusterd_svc_stop(svc, sig); +- } +- if (!empty && pid != -1) { +- ret = glusterd_detach_svc(svc, volinfo, sig); +- if (ret) +- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, +- "shd service is failed to detach volume %s from pid %d", +- volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- else +- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS, +- "Shd service is detached for volume %s from pid %d", +- volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- } +- svc->online = _gf_false; +- (void)glusterd_unlink_file((char *)svc->proc.pidfile); +- glusterd_shd_svcproc_cleanup(shd); +- ret = 0; +- glusterd_volinfo_unref(volinfo); +-out: +- gf_msg_debug(THIS->name, 0, "Returning %d", ret); +- return ret; +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h +index 55b409f..775a9d4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h ++++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h +@@ -12,20 +12,12 @@ + #define _GLUSTERD_SHD_SVC_H_ + + #include "glusterd-svc-mgmt.h" +-#include "glusterd.h" +- +-typedef struct glusterd_shdsvc_ glusterd_shdsvc_t; +-struct glusterd_shdsvc_ { +- glusterd_svc_t svc; +- gf_boolean_t attached; +-}; + + void + glusterd_shdsvc_build(glusterd_svc_t *svc); + + int +-glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn, +- glusterd_svc_proc_t *svc_proc); ++glusterd_shdsvc_init(glusterd_svc_t *svc); + + int + glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags); +@@ -35,11 +27,4 @@ glusterd_shdsvc_start(glusterd_svc_t *svc, int flags); + + int + glusterd_shdsvc_reconfigure(); +- +-int +-glusterd_shdsvc_restart(); +- +-int +-glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig); +- + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 943b1c6..54a7bd1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -748,16 +748,6 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv) + } + } + +- if (glusterd_is_shd_compatible_volume(volinfo)) { +- svc = &(volinfo->shd.svc); +- ret = svc->stop(svc, SIGTERM); +- if (ret) { +- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, +- "Failed " +- "to stop shd daemon service"); +- } +- } +- + if (glusterd_is_gfproxyd_enabled(volinfo)) { + svc = &(volinfo->gfproxyd.svc); + ret = svc->stop(svc, SIGTERM); +@@ -785,7 +775,7 @@ glusterd_peer_detach_cleanup(glusterd_conf_t *priv) + } + + /*Reconfigure all daemon services upon peer detach*/ +- ret = glusterd_svcs_reconfigure(NULL); ++ ret = glusterd_svcs_reconfigure(); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +index 1da4076..56bab07 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c +@@ -366,7 +366,6 @@ int + glusterd_snapdsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; +- glusterd_volinfo_t *tmp = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -377,7 +376,7 @@ glusterd_snapdsvc_restart() + conf = this->private; + GF_ASSERT(conf); + +- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) ++ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + /* Start per volume snapd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c +index 69d4cf4..f5ecde7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-statedump.c ++++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c +@@ -202,6 +202,9 @@ glusterd_dump_priv(xlator_t *this) + gf_proc_dump_build_key(key, "glusterd", "ping-timeout"); + gf_proc_dump_write(key, "%d", priv->ping_timeout); + ++ gf_proc_dump_build_key(key, "glusterd", "shd.online"); ++ gf_proc_dump_write(key, "%d", priv->shd_svc.online); ++ + gf_proc_dump_build_key(key, "glusterd", "nfs.online"); + gf_proc_dump_write(key, "%d", priv->nfs_svc.online); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index e42703c..ca19a75 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -7,7 +7,6 @@ + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ +-#include <signal.h> + + #include <glusterfs/globals.h> + #include <glusterfs/run.h> +@@ -21,14 +20,12 @@ + #include "glusterd-bitd-svc.h" + #include "glusterd-tierd-svc.h" + #include "glusterd-tierd-svc-helper.h" +-#include "glusterd-shd-svc-helper.h" + #include "glusterd-scrub-svc.h" + #include "glusterd-svc-helper.h" + #include <glusterfs/syscall.h> +-#include "glusterd-snapshot-utils.h" + + int +-glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo) ++glusterd_svcs_reconfigure() + { + int ret = 0; + xlator_t *this = THIS; +@@ -46,11 +43,9 @@ glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo) + goto out; + + svc_name = "self-heald"; +- if (volinfo) { +- ret = glusterd_shdsvc_reconfigure(volinfo); +- if (ret) +- goto out; +- } ++ ret = glusterd_shdsvc_reconfigure(); ++ if (ret) ++ goto out; + + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; +@@ -74,7 +69,7 @@ out: + } + + int +-glusterd_svcs_stop(glusterd_volinfo_t *volinfo) ++glusterd_svcs_stop() + { + int ret = 0; + xlator_t *this = NULL; +@@ -90,15 +85,13 @@ glusterd_svcs_stop(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + +- ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM); ++ ret = glusterd_svc_stop(&(priv->shd_svc), SIGTERM); + if (ret) + goto out; + +- if (volinfo) { +- ret = glusterd_svc_stop(&(volinfo->shd.svc), PROC_START_NO_WAIT); +- if (ret) +- goto out; +- } ++ ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM); ++ if (ret) ++ goto out; + + ret = glusterd_svc_stop(&(priv->bitd_svc), SIGTERM); + if (ret) +@@ -128,6 +121,12 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + ++ ret = conf->shd_svc.manager(&(conf->shd_svc), volinfo, PROC_START_NO_WAIT); ++ if (ret == -EINVAL) ++ ret = 0; ++ if (ret) ++ goto out; ++ + if (conf->op_version == GD_OP_VERSION_MIN) + goto out; + +@@ -144,15 +143,6 @@ glusterd_svcs_manager(glusterd_volinfo_t *volinfo) + if (ret) + goto out; + +- if (volinfo) { +- ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo, +- PROC_START_NO_WAIT); +- if (ret == -EINVAL) +- ret = 0; +- if (ret) +- goto out; +- } +- + ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT); + if (ret == -EINVAL) + ret = 0; +@@ -279,678 +269,3 @@ out: + GF_FREE(tmpvol); + return ret; + } +- +-int +-glusterd_volume_svc_check_volfile_identical( +- char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, +- glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) +-{ +- char orgvol[PATH_MAX] = { +- 0, +- }; +- char *tmpvol = NULL; +- xlator_t *this = NULL; +- int ret = -1; +- int need_unlink = 0; +- int tmp_fd = -1; +- +- this = THIS; +- +- GF_VALIDATE_OR_GOTO(this->name, this, out); +- GF_VALIDATE_OR_GOTO(this->name, identical, out); +- +- /* This builds volfile for volume level dameons */ +- glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, +- sizeof(orgvol)); +- +- ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); +- if (ret < 0) { +- goto out; +- } +- +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ +- tmp_fd = mkstemp(tmpvol); +- if (tmp_fd < 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +- "Unable to create temp file" +- " %s:(%s)", +- tmpvol, strerror(errno)); +- ret = -1; +- goto out; +- } +- +- need_unlink = 1; +- +- ret = builder(volinfo, tmpvol, mode_dict); +- if (ret) +- goto out; +- +- ret = glusterd_check_files_identical(orgvol, tmpvol, identical); +-out: +- if (need_unlink) +- sys_unlink(tmpvol); +- +- if (tmpvol != NULL) +- GF_FREE(tmpvol); +- +- if (tmp_fd >= 0) +- sys_close(tmp_fd); +- +- return ret; +-} +- +-int +-glusterd_volume_svc_check_topology_identical( +- char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo, +- glusterd_vol_graph_builder_t builder, gf_boolean_t *identical) +-{ +- char orgvol[PATH_MAX] = { +- 0, +- }; +- char *tmpvol = NULL; +- glusterd_conf_t *conf = NULL; +- xlator_t *this = THIS; +- int ret = -1; +- int tmpclean = 0; +- int tmpfd = -1; +- +- if ((!identical) || (!this) || (!this->private)) +- goto out; +- +- conf = this->private; +- GF_VALIDATE_OR_GOTO(this->name, conf, out); +- +- /* This builds volfile for volume level dameons */ +- glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol, +- sizeof(orgvol)); +- /* Create the temporary volfile */ +- ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name); +- if (ret < 0) { +- goto out; +- } +- +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ +- tmpfd = mkstemp(tmpvol); +- if (tmpfd < 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +- "Unable to create temp file" +- " %s:(%s)", +- tmpvol, strerror(errno)); +- ret = -1; +- goto out; +- } +- +- tmpclean = 1; /* SET the flag to unlink() tmpfile */ +- +- ret = builder(volinfo, tmpvol, mode_dict); +- if (ret) +- goto out; +- +- /* Compare the topology of volfiles */ +- ret = glusterd_check_topology_identical(orgvol, tmpvol, identical); +-out: +- if (tmpfd >= 0) +- sys_close(tmpfd); +- if (tmpclean) +- sys_unlink(tmpvol); +- if (tmpvol != NULL) +- GF_FREE(tmpvol); +- return ret; +-} +- +-void * +-__gf_find_compatible_svc(gd_node_type daemon) +-{ +- glusterd_svc_proc_t *svc_proc = NULL; +- glusterd_svc_proc_t *return_proc = NULL; +- glusterd_svc_t *parent_svc = NULL; +- struct cds_list_head *svc_procs = NULL; +- glusterd_conf_t *conf = NULL; +- int pid = -1; +- +- conf = THIS->private; +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); +- +- if (daemon == GD_NODE_SHD) { +- svc_procs = &conf->shd_procs; +- if (!svc_procs) +- goto out; +- } +- +- cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) +- { +- parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t, +- mux_svc); +- if (!return_proc) +- return_proc = svc_proc; +- +- /* If there is an already running shd daemons, select it. Otehrwise +- * select the first one. +- */ +- if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid)) +- return (void *)svc_proc; +- /* +- * Logic to select one process goes here. Currently there is only one +- * shd_proc. So selecting the first one; +- */ +- } +-out: +- return return_proc; +-} +- +-glusterd_svc_proc_t * +-glusterd_svcprocess_new() +-{ +- glusterd_svc_proc_t *new_svcprocess = NULL; +- +- new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess), +- gf_gld_mt_glusterd_svc_proc_t); +- +- if (!new_svcprocess) +- return NULL; +- +- CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list); +- CDS_INIT_LIST_HEAD(&new_svcprocess->svcs); +- new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify; +- return new_svcprocess; +-} +- +-int +-glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc) +-{ +- int ret = -1; +- glusterd_svc_proc_t *mux_proc = NULL; +- glusterd_conn_t *mux_conn = NULL; +- glusterd_conf_t *conf = NULL; +- glusterd_svc_t *parent_svc = NULL; +- int pid = -1; +- +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); +- conf = THIS->private; +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- +- pthread_mutex_lock(&conf->attach_lock); +- { +- if (!svc->inited) { +- if (gf_is_service_running(svc->proc.pidfile, &pid)) { +- /* Just connect is required, but we don't know what happens +- * during the disconnect. So better to reattach. +- */ +- mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid); +- } +- +- if (!mux_proc) { +- if (pid != -1 && sys_access(svc->proc.pidfile, R_OK) == 0) { +- /* stale pid file, unlink it. */ +- kill(pid, SIGTERM); +- sys_unlink(svc->proc.pidfile); +- } +- mux_proc = __gf_find_compatible_svc(GD_NODE_SHD); +- } +- if (mux_proc) { +- /* Take first entry from the process */ +- parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t, +- mux_svc); +- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); +- mux_conn = &parent_svc->conn; +- if (volinfo) +- volinfo->shd.attached = _gf_true; +- } else { +- mux_proc = glusterd_svcprocess_new(); +- if (!mux_proc) { +- ret = -1; +- goto unlock; +- } +- cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs); +- } +- svc->svc_proc = mux_proc; +- cds_list_del_init(&svc->mux_svc); +- cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs); +- ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc); +- if (ret) { +- pthread_mutex_unlock(&conf->attach_lock); +- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC, +- "Failed to init shd " +- "service"); +- goto out; +- } +- gf_msg_debug(THIS->name, 0, "shd service initialized"); +- svc->inited = _gf_true; +- } +- ret = 0; +- } +-unlock: +- pthread_mutex_unlock(&conf->attach_lock); +-out: +- return ret; +-} +- +-void * +-__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid) +-{ +- glusterd_svc_proc_t *svc_proc = NULL; +- struct cds_list_head *svc_procs = NULL; +- glusterd_svc_t *svc = NULL; +- pid_t mux_pid = -1; +- glusterd_conf_t *conf = NULL; +- +- conf = THIS->private; +- if (!conf) +- return NULL; +- +- if (daemon == GD_NODE_SHD) { +- svc_procs = &conf->shd_procs; +- if (!svc_proc) +- return NULL; +- } /* Can be moved to switch when mux is implemented for other daemon; */ +- +- cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list) +- { +- cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc) +- { +- if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) { +- if (mux_pid == pid) { +- /*TODO +- * inefficient loop, but at the moment, there is only +- * one shd. +- */ +- return svc_proc; +- } +- } +- } +- } +- return NULL; +-} +- +-static int32_t +-my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) +-{ +- call_frame_t *frame = v_frame; +- xlator_t *this = NULL; +- glusterd_conf_t *conf = NULL; +- +- GF_VALIDATE_OR_GOTO("glusterd", frame, out); +- this = frame->this; +- GF_VALIDATE_OR_GOTO("glusterd", this, out); +- conf = this->private; +- GF_VALIDATE_OR_GOTO(this->name, conf, out); +- +- GF_ATOMIC_DEC(conf->blockers); +- +- STACK_DESTROY(frame->root); +-out: +- return 0; +-} +- +-static int32_t +-glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count, +- void *v_frame) +-{ +- call_frame_t *frame = v_frame; +- glusterd_volinfo_t *volinfo = NULL; +- glusterd_shdsvc_t *shd = NULL; +- glusterd_svc_t *svc = frame->cookie; +- glusterd_svc_t *parent_svc = NULL; +- glusterd_svc_proc_t *mux_proc = NULL; +- glusterd_conf_t *conf = NULL; +- int *flag = (int *)frame->local; +- xlator_t *this = THIS; +- int pid = -1; +- int ret = -1; +- gf_getspec_rsp rsp = { +- 0, +- }; +- +- GF_VALIDATE_OR_GOTO("glusterd", this, out); +- conf = this->private; +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); +- GF_VALIDATE_OR_GOTO("glusterd", frame, out); +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- +- frame->local = NULL; +- frame->cookie = NULL; +- +- if (!strcmp(svc->name, "glustershd")) { +- /* Get volinfo->shd from svc object */ +- shd = cds_list_entry(svc, glusterd_shdsvc_t, svc); +- if (!shd) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL, +- "Failed to get shd object " +- "from shd service"); +- goto out; +- } +- +- /* Get volinfo from shd */ +- volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd); +- if (!volinfo) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, +- "Failed to get volinfo from " +- "from shd"); +- goto out; +- } +- } +- +- if (!iov) { +- gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, +- "iov is NULL"); +- ret = -1; +- goto out; +- } +- +- ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp); +- if (ret < 0) { +- gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, +- "XDR decoding error"); +- ret = -1; +- goto out; +- } +- +- if (rsp.op_ret == 0) { +- pthread_mutex_lock(&conf->attach_lock); +- { +- if (!strcmp(svc->name, "glustershd")) { +- mux_proc = svc->svc_proc; +- if (mux_proc && +- !gf_is_service_running(svc->proc.pidfile, &pid)) { +- /* +- * When svc's are restarting, there is a chance that the +- * attached svc might not have updated it's pid. Because +- * it was at connection stage. So in that case, we need +- * to retry the pid file copy. +- */ +- parent_svc = cds_list_entry(mux_proc->svcs.next, +- glusterd_svc_t, mux_svc); +- if (parent_svc) +- sys_link(parent_svc->proc.pidfile, svc->proc.pidfile); +- } +- } +- svc->online = _gf_true; +- } +- pthread_mutex_unlock(&conf->attach_lock); +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL, +- "svc %s of volume %s attached successfully to pid %d", svc->name, +- volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- } else { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, +- "svc %s of volume %s failed to " +- "attach to pid %d. Starting a new process", +- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- if (!strcmp(svc->name, "glustershd")) { +- glusterd_recover_shd_attach_failure(volinfo, svc, *flag); +- } +- } +-out: +- if (flag) { +- GF_FREE(flag); +- } +- GF_ATOMIC_DEC(conf->blockers); +- STACK_DESTROY(frame->root); +- return 0; +-} +- +-extern size_t +-build_volfile_path(char *volume_id, char *path, size_t path_len, +- char *trusted_str); +- +-int +-__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags, +- struct rpc_clnt *rpc, char *volfile_id, +- int op) +-{ +- int ret = -1; +- struct iobuf *iobuf = NULL; +- struct iobref *iobref = NULL; +- struct iovec iov = { +- 0, +- }; +- char path[PATH_MAX] = { +- '\0', +- }; +- struct stat stbuf = { +- 0, +- }; +- int32_t spec_fd = -1; +- size_t file_len = -1; +- char *volfile_content = NULL; +- ssize_t req_size = 0; +- call_frame_t *frame = NULL; +- gd1_mgmt_brick_op_req brick_req; +- void *req = &brick_req; +- void *errlbl = &&err; +- struct rpc_clnt_connection *conn; +- xlator_t *this = THIS; +- glusterd_conf_t *conf = THIS->private; +- extern struct rpc_clnt_program gd_brick_prog; +- fop_cbk_fn_t cbkfn = my_callback; +- +- if (!rpc) { +- gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL, +- "called with null rpc"); +- return -1; +- } +- +- conn = &rpc->conn; +- if (!conn->connected || conn->disconnected) { +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED, +- "not connected yet"); +- return -1; +- } +- +- brick_req.op = op; +- brick_req.name = volfile_id; +- brick_req.input.input_val = NULL; +- brick_req.input.input_len = 0; +- +- frame = create_frame(this, this->ctx->pool); +- if (!frame) { +- goto *errlbl; +- } +- +- if (op == GLUSTERD_SVC_ATTACH) { +- (void)build_volfile_path(volfile_id, path, sizeof(path), NULL); +- +- ret = sys_stat(path, &stbuf); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, +- "Unable to stat %s (%s)", path, strerror(errno)); +- ret = -EINVAL; +- goto *errlbl; +- } +- +- file_len = stbuf.st_size; +- volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char); +- if (!volfile_content) { +- ret = -ENOMEM; +- goto *errlbl; +- } +- spec_fd = open(path, O_RDONLY); +- if (spec_fd < 0) { +- gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, +- "failed to read volfile %s", path); +- ret = -EIO; +- goto *errlbl; +- } +- ret = sys_read(spec_fd, volfile_content, file_len); +- if (ret == file_len) { +- brick_req.input.input_val = volfile_content; +- brick_req.input.input_len = file_len; +- } else { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL, +- "read failed on path %s. File size=%" GF_PRI_SIZET +- "read size=%d", +- path, file_len, ret); +- ret = -EIO; +- goto *errlbl; +- } +- +- frame->cookie = svc; +- frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int); +- *((int *)frame->local) = flags; +- cbkfn = glusterd_svc_attach_cbk; +- } +- +- req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req); +- iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size); +- if (!iobuf) { +- goto *errlbl; +- } +- errlbl = &&maybe_free_iobuf; +- +- iov.iov_base = iobuf->ptr; +- iov.iov_len = iobuf_pagesize(iobuf); +- +- iobref = iobref_new(); +- if (!iobref) { +- goto *errlbl; +- } +- errlbl = &&free_iobref; +- +- iobref_add(iobref, iobuf); +- /* +- * Drop our reference to the iobuf. The iobref should already have +- * one after iobref_add, so when we unref that we'll free the iobuf as +- * well. This allows us to pass just the iobref as frame->local. +- */ +- iobuf_unref(iobuf); +- /* Set the pointer to null so we don't free it on a later error. */ +- iobuf = NULL; +- +- /* Create the xdr payload */ +- ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req); +- if (ret == -1) { +- goto *errlbl; +- } +- iov.iov_len = ret; +- +- /* Send the msg */ +- GF_ATOMIC_INC(conf->blockers); +- ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0, +- iobref, frame, NULL, 0, NULL, 0, NULL); +- GF_FREE(volfile_content); +- if (spec_fd >= 0) +- sys_close(spec_fd); +- return ret; +- +-free_iobref: +- iobref_unref(iobref); +-maybe_free_iobuf: +- if (iobuf) { +- iobuf_unref(iobuf); +- } +-err: +- GF_FREE(volfile_content); +- if (spec_fd >= 0) +- sys_close(spec_fd); +- if (frame) +- STACK_DESTROY(frame->root); +- return -1; +-} +- +-int +-glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags) +-{ +- glusterd_conf_t *conf = THIS->private; +- int ret = -1; +- int tries; +- rpc_clnt_t *rpc = NULL; +- +- GF_VALIDATE_OR_GOTO("glusterd", conf, out); +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- GF_VALIDATE_OR_GOTO("glusterd", volinfo, out); +- +- gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO, +- "adding svc %s (volume=%s) to existing " +- "process with pid %d", +- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- +- rpc = rpc_clnt_ref(svc->conn.rpc); +- for (tries = 15; tries > 0; --tries) { +- if (rpc) { +- pthread_mutex_lock(&conf->attach_lock); +- { +- ret = __glusterd_send_svc_configure_req( +- svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH); +- } +- pthread_mutex_unlock(&conf->attach_lock); +- if (!ret) { +- volinfo->shd.attached = _gf_true; +- goto out; +- } +- } +- /* +- * It might not actually be safe to manipulate the lock +- * like this, but if we don't then the connection can +- * never actually complete and retries are useless. +- * Unfortunately, all of the alternatives (e.g. doing +- * all of this in a separate thread) are much more +- * complicated and risky. +- * TBD: see if there's a better way +- */ +- synclock_unlock(&conf->big_lock); +- sleep(1); +- synclock_lock(&conf->big_lock); +- } +- ret = -1; +- gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL, +- "attach failed for %s(volume=%s)", svc->name, volinfo->volname); +-out: +- if (rpc) +- rpc_clnt_unref(rpc); +- return ret; +-} +- +-int +-glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig) +-{ +- glusterd_conf_t *conf = THIS->private; +- int ret = -1; +- int tries; +- rpc_clnt_t *rpc = NULL; +- +- GF_VALIDATE_OR_GOTO(THIS->name, conf, out); +- GF_VALIDATE_OR_GOTO(THIS->name, svc, out); +- GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out); +- +- gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO, +- "removing svc %s (volume=%s) from existing " +- "process with pid %d", +- svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc)); +- +- rpc = rpc_clnt_ref(svc->conn.rpc); +- for (tries = 15; tries > 0; --tries) { +- if (rpc) { +- /*For detach there is no flags, and we are not using sig.*/ +- pthread_mutex_lock(&conf->attach_lock); +- { +- ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc, +- svc->proc.volfileid, +- GLUSTERD_SVC_DETACH); +- } +- pthread_mutex_unlock(&conf->attach_lock); +- if (!ret) { +- goto out; +- } +- } +- /* +- * It might not actually be safe to manipulate the lock +- * like this, but if we don't then the connection can +- * never actually complete and retries are useless. +- * Unfortunately, all of the alternatives (e.g. doing +- * all of this in a separate thread) are much more +- * complicated and risky. +- * TBD: see if there's a better way +- */ +- synclock_unlock(&conf->big_lock); +- sleep(1); +- synclock_lock(&conf->big_lock); +- } +- ret = -1; +- gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL, +- "detach failed for %s(volume=%s)", svc->name, volinfo->volname); +-out: +- if (rpc) +- rpc_clnt_unref(rpc); +- return ret; +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h +index 5def246..cc98e78 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h +@@ -16,10 +16,10 @@ + #include "glusterd-volgen.h" + + int +-glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo); ++glusterd_svcs_reconfigure(); + + int +-glusterd_svcs_stop(glusterd_volinfo_t *vol); ++glusterd_svcs_stop(); + + int + glusterd_svcs_manager(glusterd_volinfo_t *volinfo); +@@ -41,41 +41,5 @@ int + glusterd_svc_check_tier_topology_identical(char *svc_name, + glusterd_volinfo_t *volinfo, + gf_boolean_t *identical); +-int +-glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict, +- glusterd_volinfo_t *volinfo, +- glusterd_vol_graph_builder_t, +- gf_boolean_t *identical); +-int +-glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict, +- glusterd_volinfo_t *volinfo, +- glusterd_vol_graph_builder_t, +- gf_boolean_t *identical); +-void +-glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, +- char *volfile, size_t len); +-void * +-__gf_find_compatible_svc(gd_node_type daemon); +- +-glusterd_svc_proc_t * +-glusterd_svcprocess_new(); +- +-int +-glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc); +- +-void * +-__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid); +- +-int +-glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, +- int flags); +- +-int +-glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig); +- +-int +-__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag, +- struct rpc_clnt *rpc, char *volfile_id, +- int op); + + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +index f32dafc..4cd4cea 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c +@@ -18,7 +18,6 @@ + #include "glusterd-conn-mgmt.h" + #include "glusterd-messages.h" + #include <glusterfs/syscall.h> +-#include "glusterd-shd-svc-helper.h" + + int + glusterd_svc_create_rundir(char *rundir) +@@ -168,75 +167,68 @@ glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline) + GF_ASSERT(this); + + priv = this->private; +- GF_VALIDATE_OR_GOTO("glusterd", priv, out); +- GF_VALIDATE_OR_GOTO("glusterd", svc, out); +- +- pthread_mutex_lock(&priv->attach_lock); +- { +- if (glusterd_proc_is_running(&(svc->proc))) { +- ret = 0; +- goto unlock; +- } ++ GF_ASSERT(priv); + +- ret = sys_access(svc->proc.volfile, F_OK); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND, +- "Volfile %s is not present", svc->proc.volfile); +- goto unlock; +- } ++ if (glusterd_proc_is_running(&(svc->proc))) { ++ ret = 0; ++ goto out; ++ } + +- runinit(&runner); ++ ret = sys_access(svc->proc.volfile, F_OK); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND, ++ "Volfile %s is not present", svc->proc.volfile); ++ goto out; ++ } + +- if (this->ctx->cmd_args.valgrind) { +- len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", +- svc->proc.logfile, svc->name); +- if ((len < 0) || (len >= PATH_MAX)) { +- ret = -1; +- goto unlock; +- } ++ runinit(&runner); + +- runner_add_args(&runner, "valgrind", "--leak-check=full", +- "--trace-children=yes", "--track-origins=yes", +- NULL); +- runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); ++ if (this->ctx->cmd_args.valgrind) { ++ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log", ++ svc->proc.logfile, svc->name); ++ if ((len < 0) || (len >= PATH_MAX)) { ++ ret = -1; ++ goto out; + } + +- runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", +- svc->proc.volfileserver, "--volfile-id", +- svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", +- svc->proc.logfile, "-S", svc->conn.sockpath, NULL); ++ runner_add_args(&runner, "valgrind", "--leak-check=full", ++ "--trace-children=yes", "--track-origins=yes", NULL); ++ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile); ++ } + +- if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, +- SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), +- &localtime_logging) == 0) { +- if (strcmp(localtime_logging, "enable") == 0) +- runner_add_arg(&runner, "--localtime-logging"); +- } +- if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY, +- SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), +- &log_level) == 0) { +- snprintf(daemon_log_level, 30, "--log-level=%s", log_level); +- runner_add_arg(&runner, daemon_log_level); +- } ++ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", ++ svc->proc.volfileserver, "--volfile-id", ++ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l", ++ svc->proc.logfile, "-S", svc->conn.sockpath, NULL); ++ ++ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY, ++ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY), ++ &localtime_logging) == 0) { ++ if (strcmp(localtime_logging, "enable") == 0) ++ runner_add_arg(&runner, "--localtime-logging"); ++ } ++ if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY, ++ SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY), &log_level) == 0) { ++ snprintf(daemon_log_level, 30, "--log-level=%s", log_level); ++ runner_add_arg(&runner, daemon_log_level); ++ } + +- if (cmdline) +- dict_foreach(cmdline, svc_add_args, (void *)&runner); ++ if (cmdline) ++ dict_foreach(cmdline, svc_add_args, (void *)&runner); + +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, +- "Starting %s service", svc->name); ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_START_SUCCESS, ++ "Starting %s service", svc->name); + +- if (flags == PROC_START_NO_WAIT) { +- ret = runner_run_nowait(&runner); +- } else { +- synclock_unlock(&priv->big_lock); +- { +- ret = runner_run(&runner); +- } +- synclock_lock(&priv->big_lock); ++ if (flags == PROC_START_NO_WAIT) { ++ ret = runner_run_nowait(&runner); ++ } else { ++ synclock_unlock(&priv->big_lock); ++ { ++ ret = runner_run(&runner); + } ++ synclock_lock(&priv->big_lock); + } +-unlock: +- pthread_mutex_unlock(&priv->attach_lock); ++ + out: + gf_msg_debug(this->name, 0, "Returning %d", ret); + +@@ -289,8 +281,7 @@ glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile, + + glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir)); + +- if (!strcmp(server, "quotad")) +- /*quotad has different volfile name*/ ++ if (!strcmp(server, "quotad")) /*quotad has different volfile name*/ + snprintf(volfile, len, "%s/%s.vol", dir, server); + else + snprintf(volfile, len, "%s/%s-server.vol", dir, server); +@@ -375,138 +366,3 @@ glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event) + + return ret; + } +- +-void +-glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol, +- char *volfile, size_t len) +-{ +- GF_ASSERT(len == PATH_MAX); +- +- if (!strcmp(server, "glustershd")) { +- glusterd_svc_build_shd_volfile_path(vol, volfile, len); +- } +-} +- +-int +-glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc, +- rpc_clnt_event_t event) +-{ +- int ret = 0; +- glusterd_svc_t *svc = NULL; +- glusterd_svc_t *tmp = NULL; +- xlator_t *this = NULL; +- gf_boolean_t need_logging = _gf_false; +- +- this = THIS; +- GF_ASSERT(this); +- +- if (!mux_proc) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, +- "Failed to get the svc proc data"); +- return -1; +- } +- +- /* Currently this function was used for shd svc, if this function is +- * using for another svc, change ths glustershd reference. We can get +- * the svc name from any of the attached svc's +- */ +- switch (event) { +- case RPC_CLNT_CONNECT: +- gf_msg_debug(this->name, 0, +- "glustershd has connected with glusterd."); +- gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd"); +- cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) +- { +- if (svc->online) +- continue; +- svc->online = _gf_true; +- } +- break; +- +- case RPC_CLNT_DISCONNECT: +- cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc) +- { +- if (svc->online) { +- if (!need_logging) +- need_logging = _gf_true; +- svc->online = _gf_false; +- } +- } +- if (need_logging) { +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED, +- "glustershd has disconnected from glusterd."); +- gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd"); +- } +- break; +- +- default: +- gf_msg_trace(this->name, 0, "got some other RPC event %d", event); +- break; +- } +- +- return ret; +-} +- +-int +-glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, +- char *sockpath, int frame_timeout, +- glusterd_muxsvc_conn_notify_t notify) +-{ +- int ret = -1; +- dict_t *options = NULL; +- struct rpc_clnt *rpc = NULL; +- xlator_t *this = THIS; +- glusterd_svc_t *svc = NULL; +- +- options = dict_new(); +- if (!this || !options) +- goto out; +- +- svc = cds_list_entry(conn, glusterd_svc_t, conn); +- if (!svc) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL, +- "Failed to get the service"); +- goto out; +- } +- +- ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout); +- if (ret) +- goto out; +- +- ret = dict_set_int32n(options, "transport.socket.ignore-enoent", +- SLEN("transport.socket.ignore-enoent"), 1); +- if (ret) +- goto out; +- +- /* @options is free'd by rpc_transport when destroyed */ +- rpc = rpc_clnt_new(options, this, (char *)svc->name, 16); +- if (!rpc) { +- ret = -1; +- goto out; +- } +- +- ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify, +- mux_proc); +- if (ret) +- goto out; +- +- ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath); +- if (ret < 0) +- goto out; +- else +- ret = 0; +- +- conn->frame_timeout = frame_timeout; +- conn->rpc = rpc; +- mux_proc->notify = notify; +-out: +- if (options) +- dict_unref(options); +- if (ret) { +- if (rpc) { +- rpc_clnt_unref(rpc); +- rpc = NULL; +- } +- } +- return ret; +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +index fbc5225..c850bfd 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h +@@ -13,12 +13,9 @@ + + #include "glusterd-proc-mgmt.h" + #include "glusterd-conn-mgmt.h" +-#include "glusterd-rcu.h" + + struct glusterd_svc_; +- + typedef struct glusterd_svc_ glusterd_svc_t; +-typedef struct glusterd_svc_proc_ glusterd_svc_proc_t; + + typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc); + +@@ -28,17 +25,6 @@ typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags); + typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig); + typedef int (*glusterd_svc_reconfigure_t)(void *data); + +-typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc, +- rpc_clnt_event_t event); +- +-struct glusterd_svc_proc_ { +- struct cds_list_head svc_proc_list; +- struct cds_list_head svcs; +- glusterd_muxsvc_conn_notify_t notify; +- rpc_clnt_t *rpc; +- void *data; +-}; +- + struct glusterd_svc_ { + char name[NAME_MAX]; + glusterd_conn_t conn; +@@ -49,8 +35,6 @@ struct glusterd_svc_ { + gf_boolean_t online; + gf_boolean_t inited; + glusterd_svc_reconfigure_t reconfigure; +- glusterd_svc_proc_t *svc_proc; +- struct cds_list_head mux_svc; + }; + + int +@@ -85,15 +69,4 @@ glusterd_svc_reconfigure(int (*create_volfile)()); + int + glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event); + +-int +-glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn, +- rpc_clnt_event_t event); +- +-int +-glusterd_proc_get_pid(glusterd_proc_t *proc); +- +-int +-glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc, +- char *sockpath, int frame_timeout, +- glusterd_muxsvc_conn_notify_t notify); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-tier.c b/xlators/mgmt/glusterd/src/glusterd-tier.c +index 23a9592..4dc0d44 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tier.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tier.c +@@ -27,7 +27,6 @@ + #include "glusterd-messages.h" + #include "glusterd-mgmt.h" + #include "glusterd-syncop.h" +-#include "glusterd-shd-svc-helper.h" + + #include <sys/wait.h> + #include <dlfcn.h> +@@ -616,7 +615,7 @@ glusterd_op_remove_tier_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + + if (cmd == GF_DEFRAG_CMD_DETACH_START && + volinfo->status == GLUSTERD_STATUS_STARTED) { +- ret = glusterd_svcs_reconfigure(volinfo); ++ ret = glusterd_svcs_reconfigure(); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL, + "Unable to reconfigure NFS-Server"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +index ab463f1..04ceec5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc.c +@@ -83,6 +83,7 @@ glusterd_tierdsvc_init(void *data) + goto out; + + notify = glusterd_svc_common_rpc_notify; ++ glusterd_store_perform_node_state_store(volinfo); + + volinfo->type = GF_CLUSTER_TYPE_TIER; + +@@ -394,7 +395,6 @@ int + glusterd_tierdsvc_restart() + { + glusterd_volinfo_t *volinfo = NULL; +- glusterd_volinfo_t *tmp = NULL; + int ret = 0; + xlator_t *this = THIS; + glusterd_conf_t *conf = NULL; +@@ -405,7 +405,7 @@ glusterd_tierdsvc_restart() + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + +- cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list) ++ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list) + { + /* Start per volume tierd svc */ + if (volinfo->status == GLUSTERD_STATUS_STARTED && +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 4525ec7..2aa975b 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -61,7 +61,6 @@ + #include "glusterd-server-quorum.h" + #include <glusterfs/quota-common-utils.h> + #include <glusterfs/common-utils.h> +-#include "glusterd-shd-svc-helper.h" + + #include "xdr-generic.h" + #include <sys/resource.h> +@@ -625,17 +624,13 @@ glusterd_volinfo_t * + glusterd_volinfo_unref(glusterd_volinfo_t *volinfo) + { + int refcnt = -1; +- glusterd_conf_t *conf = THIS->private; + +- pthread_mutex_lock(&conf->volume_lock); ++ pthread_mutex_lock(&volinfo->reflock); + { +- pthread_mutex_lock(&volinfo->reflock); +- { +- refcnt = --volinfo->refcnt; +- } +- pthread_mutex_unlock(&volinfo->reflock); ++ refcnt = --volinfo->refcnt; + } +- pthread_mutex_unlock(&conf->volume_lock); ++ pthread_mutex_unlock(&volinfo->reflock); ++ + if (!refcnt) { + glusterd_volinfo_delete(volinfo); + return NULL; +@@ -707,7 +702,6 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo) + glusterd_snapdsvc_build(&new_volinfo->snapd.svc); + glusterd_tierdsvc_build(&new_volinfo->tierd.svc); + glusterd_gfproxydsvc_build(&new_volinfo->gfproxyd.svc); +- glusterd_shdsvc_build(&new_volinfo->shd.svc); + + pthread_mutex_init(&new_volinfo->reflock, NULL); + *volinfo = glusterd_volinfo_ref(new_volinfo); +@@ -1073,11 +1067,11 @@ glusterd_volinfo_delete(glusterd_volinfo_t *volinfo) + gf_store_handle_destroy(volinfo->snapd.handle); + + glusterd_auth_cleanup(volinfo); +- glusterd_shd_svcproc_cleanup(&volinfo->shd); + + pthread_mutex_destroy(&volinfo->reflock); + GF_FREE(volinfo); + ret = 0; ++ + out: + gf_msg_debug(THIS->name, 0, "Returning %d", ret); + return ret; +@@ -3929,7 +3923,6 @@ glusterd_spawn_daemons(void *opaque) + ret = glusterd_snapdsvc_restart(); + ret = glusterd_tierdsvc_restart(); + ret = glusterd_gfproxydsvc_restart(); +- ret = glusterd_shdsvc_restart(); + return ret; + } + +@@ -4880,9 +4873,6 @@ glusterd_delete_stale_volume(glusterd_volinfo_t *stale_volinfo, + svc = &(stale_volinfo->snapd.svc); + (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); + } +- svc = &(stale_volinfo->shd.svc); +- (void)svc->manager(svc, stale_volinfo, PROC_START_NO_WAIT); +- + (void)glusterd_volinfo_remove(stale_volinfo); + + return 0; +@@ -4997,15 +4987,6 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + glusterd_volinfo_unref(old_volinfo); + } + +- ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, +- "Failed to store " +- "volinfo for volume %s", +- new_volinfo->volname); +- goto out; +- } +- + if (glusterd_is_volume_started(new_volinfo)) { + (void)glusterd_start_bricks(new_volinfo); + if (glusterd_is_snapd_enabled(new_volinfo)) { +@@ -5014,10 +4995,15 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); + } + } +- svc = &(new_volinfo->shd.svc); +- if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { +- gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); +- } ++ } ++ ++ ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, ++ "Failed to store " ++ "volinfo for volume %s", ++ new_volinfo->volname); ++ goto out; + } + + ret = glusterd_create_volfiles_and_notify_services(new_volinfo); +@@ -5521,7 +5507,9 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + glusterd_svc_build_pidfile_path(server, priv->rundir, pidfile, + sizeof(pidfile)); + +- if (strcmp(server, priv->nfs_svc.name) == 0) ++ if (strcmp(server, priv->shd_svc.name) == 0) ++ svc = &(priv->shd_svc); ++ else if (strcmp(server, priv->nfs_svc.name) == 0) + svc = &(priv->nfs_svc); + else if (strcmp(server, priv->quotad_svc.name) == 0) + svc = &(priv->quotad_svc); +@@ -5552,6 +5540,9 @@ glusterd_add_node_to_dict(char *server, dict_t *dict, int count, + if (!strcmp(server, priv->nfs_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "NFS Server", + SLEN("NFS Server")); ++ else if (!strcmp(server, priv->shd_svc.name)) ++ ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", ++ SLEN("Self-heal Daemon")); + else if (!strcmp(server, priv->quotad_svc.name)) + ret = dict_set_nstrn(dict, key, keylen, "Quota Daemon", + SLEN("Quota Daemon")); +@@ -9115,21 +9106,6 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid) + "to stop snapd daemon service"); + } + } +- +- if (glusterd_is_shd_compatible_volume(volinfo)) { +- /* +- * Sending stop request for all volumes. So it is fine +- * to send stop for mux shd +- */ +- svc = &(volinfo->shd.svc); +- ret = svc->stop(svc, SIGTERM); +- if (ret) { +- gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, +- "Failed " +- "to stop shd daemon service"); +- } +- } +- + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + svc = &(volinfo->tierd.svc); + ret = svc->stop(svc, SIGTERM); +@@ -9155,7 +9131,7 @@ glusterd_friend_remove_cleanup_vols(uuid_t uuid) + } + + /* Reconfigure all daemon services upon peer detach */ +- ret = glusterd_svcs_reconfigure(NULL); ++ ret = glusterd_svcs_reconfigure(); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL, + "Failed to reconfigure all daemon services."); +@@ -14746,74 +14722,3 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo) + return _gf_true; + return _gf_false; + } +- +-int32_t +-glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, +- int32_t count) +-{ +- int ret = -1; +- int32_t pid = -1; +- int32_t brick_online = -1; +- char key[64] = {0}; +- int keylen; +- char *pidfile = NULL; +- xlator_t *this = NULL; +- char *uuid_str = NULL; +- +- this = THIS; +- GF_VALIDATE_OR_GOTO(THIS->name, this, out); +- +- GF_VALIDATE_OR_GOTO(this->name, volinfo, out); +- GF_VALIDATE_OR_GOTO(this->name, dict, out); +- +- keylen = snprintf(key, sizeof(key), "brick%d.hostname", count); +- ret = dict_set_nstrn(dict, key, keylen, "Self-heal Daemon", +- SLEN("Self-heal Daemon")); +- if (ret) +- goto out; +- +- keylen = snprintf(key, sizeof(key), "brick%d.path", count); +- uuid_str = gf_strdup(uuid_utoa(MY_UUID)); +- if (!uuid_str) { +- ret = -1; +- goto out; +- } +- ret = dict_set_dynstrn(dict, key, keylen, uuid_str); +- if (ret) +- goto out; +- uuid_str = NULL; +- +- /* shd doesn't have a port. but the cli needs a port key with +- * a zero value to parse. +- * */ +- +- keylen = snprintf(key, sizeof(key), "brick%d.port", count); +- ret = dict_set_int32n(dict, key, keylen, 0); +- if (ret) +- goto out; +- +- pidfile = volinfo->shd.svc.proc.pidfile; +- +- brick_online = gf_is_service_running(pidfile, &pid); +- +- /* If shd is not running, then don't print the pid */ +- if (!brick_online) +- pid = -1; +- keylen = snprintf(key, sizeof(key), "brick%d.pid", count); +- ret = dict_set_int32n(dict, key, keylen, pid); +- if (ret) +- goto out; +- +- keylen = snprintf(key, sizeof(key), "brick%d.status", count); +- ret = dict_set_int32n(dict, key, keylen, brick_online); +- +-out: +- if (uuid_str) +- GF_FREE(uuid_str); +- if (ret) +- gf_msg(this ? this->name : "glusterd", GF_LOG_ERROR, 0, +- GD_MSG_DICT_SET_FAILED, +- "Returning %d. adding values to dict failed", ret); +- +- return ret; +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 5c6a453..ead16b2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -881,8 +881,4 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo); + + char * + search_brick_path_from_proc(pid_t brick_pid, char *brickpath); +- +-int32_t +-glusterd_add_shd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, +- int32_t count); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 8b58d40..5e0214e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -36,7 +36,6 @@ + #include "glusterd-svc-mgmt.h" + #include "glusterd-svc-helper.h" + #include "glusterd-snapd-svc-helper.h" +-#include "glusterd-shd-svc-helper.h" + #include "glusterd-gfproxyd-svc-helper.h" + + struct gd_validate_reconf_opts { +@@ -4865,7 +4864,7 @@ volgen_get_shd_key(int type) + static int + volgen_set_shd_key_enable(dict_t *set_dict, const int type) + { +- int ret = 0; ++ int ret = -1; + + switch (type) { + case GF_CLUSTER_TYPE_REPLICATE: +@@ -5156,15 +5155,24 @@ out: + static int + build_shd_volume_graph(xlator_t *this, volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, dict_t *mod_dict, +- dict_t *set_dict, gf_boolean_t graph_check) ++ dict_t *set_dict, gf_boolean_t graph_check, ++ gf_boolean_t *valid_config) + { + volgen_graph_t cgraph = {0}; + int ret = 0; + int clusters = -1; + ++ if (!graph_check && (volinfo->status != GLUSTERD_STATUS_STARTED)) ++ goto out; ++ + if (!glusterd_is_shd_compatible_volume(volinfo)) + goto out; + ++ /* Shd graph is valid only when there is at least one ++ * replica/disperse volume is present ++ */ ++ *valid_config = _gf_true; ++ + ret = prepare_shd_volume_options(volinfo, mod_dict, set_dict); + if (ret) + goto out; +@@ -5194,16 +5202,19 @@ out: + } + + int +-build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, +- dict_t *mod_dict) ++build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict) + { ++ glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; + int ret = 0; ++ gf_boolean_t valid_config = _gf_false; + xlator_t *iostxl = NULL; + gf_boolean_t graph_check = _gf_false; + + this = THIS; ++ priv = this->private; + + set_dict = dict_new(); + if (!set_dict) { +@@ -5213,18 +5224,26 @@ build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, + + if (mod_dict) + graph_check = dict_get_str_boolean(mod_dict, "graph-check", 0); +- iostxl = volgen_graph_add_as(graph, "debug/io-stats", volinfo->volname); ++ iostxl = volgen_graph_add_as(graph, "debug/io-stats", "glustershd"); + if (!iostxl) { + ret = -1; + goto out; + } + +- ret = build_shd_volume_graph(this, graph, volinfo, mod_dict, set_dict, +- graph_check); ++ cds_list_for_each_entry(voliter, &priv->volumes, vol_list) ++ { ++ ret = build_shd_volume_graph(this, graph, voliter, mod_dict, set_dict, ++ graph_check, &valid_config); ++ ret = dict_reset(set_dict); ++ if (ret) ++ goto out; ++ } + + out: + if (set_dict) + dict_unref(set_dict); ++ if (!valid_config) ++ ret = -EINVAL; + return ret; + } + +@@ -6541,10 +6560,6 @@ glusterd_create_volfiles(glusterd_volinfo_t *volinfo) + if (ret) + gf_log(this->name, GF_LOG_ERROR, "Could not generate gfproxy volfiles"); + +- ret = glusterd_shdsvc_create_volfile(volinfo); +- if (ret) +- gf_log(this->name, GF_LOG_ERROR, "Could not generate shd volfiles"); +- + dict_del_sizen(volinfo->dict, "skip-CLIOT"); + + out: +@@ -6625,7 +6640,7 @@ validate_shdopts(glusterd_volinfo_t *volinfo, dict_t *val_dict, + ret = dict_set_int32_sizen(val_dict, "graph-check", 1); + if (ret) + goto out; +- ret = build_shd_graph(volinfo, &graph, val_dict); ++ ret = build_shd_graph(&graph, val_dict); + if (!ret) + ret = graph_reconf_validateopt(&graph.graph, op_errstr); + +@@ -7002,22 +7017,3 @@ gd_is_boolean_option(char *key) + + return _gf_false; + } +- +-int +-glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, +- dict_t *mode_dict) +-{ +- int ret = -1; +- volgen_graph_t graph = { +- 0, +- }; +- +- graph.type = GF_SHD; +- ret = build_shd_graph(volinfo, &graph, mode_dict); +- if (!ret) +- ret = volgen_write_volfile(&graph, filename); +- +- volgen_graph_free(&graph); +- +- return ret; +-} +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h +index 897d8fa..f9fc068 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h +@@ -66,7 +66,6 @@ typedef enum { + GF_REBALANCED = 1, + GF_QUOTAD, + GF_SNAPD, +- GF_SHD, + } glusterd_graph_type_t; + + struct volgen_graph { +@@ -78,8 +77,6 @@ typedef struct volgen_graph volgen_graph_t; + + typedef int (*glusterd_graph_builder_t)(volgen_graph_t *graph, + dict_t *mod_dict); +-typedef int (*glusterd_vol_graph_builder_t)(glusterd_volinfo_t *, +- char *filename, dict_t *mod_dict); + + #define COMPLETE_OPTION(key, completion, ret) \ + do { \ +@@ -204,8 +201,7 @@ void + glusterd_get_shd_filepath(char *filename); + + int +-build_shd_graph(glusterd_volinfo_t *volinfo, volgen_graph_t *graph, +- dict_t *mod_dict); ++build_shd_graph(volgen_graph_t *graph, dict_t *mod_dict); + + int + build_nfs_graph(volgen_graph_t *graph, dict_t *mod_dict); +@@ -317,9 +313,4 @@ glusterd_generate_gfproxyd_volfile(glusterd_volinfo_t *volinfo); + + int + glusterd_build_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *filename); +- +-int +-glusterd_shdsvc_generate_volfile(glusterd_volinfo_t *volinfo, char *filename, +- dict_t *mode_dict); +- + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 4c3ad50..1ea8ba6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1940,7 +1940,7 @@ static int + glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, char **op_errstr) + { +- glusterd_svc_t *svc = NULL; ++ glusterd_conf_t *priv = NULL; + gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; + int ret = 0; + char msg[2408] = { +@@ -1950,6 +1950,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + "Self-heal daemon is not running. " + "Check self-heal daemon log file."; + ++ priv = this->private; + ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"), + (int32_t *)&heal_op); + if (ret) { +@@ -1958,7 +1959,6 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + +- svc = &(volinfo->shd.svc); + switch (heal_op) { + case GF_SHD_OP_INVALID: + case GF_SHD_OP_HEAL_ENABLE: /* This op should be handled in volume-set*/ +@@ -1988,7 +1988,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + +- if (!svc->online) { ++ if (!priv->shd_svc.online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; +@@ -2009,7 +2009,7 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + goto out; + } + +- if (!svc->online) { ++ if (!priv->shd_svc.online) { + ret = -1; + *op_errstr = gf_strdup(offline_msg); + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index c0973cb..d360312 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1537,6 +1537,14 @@ init(xlator_t *this) + exit(1); + } + ++ ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_GLUSTERSHD_RUN_DIR); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, ++ "Unable to create " ++ "glustershd running directory"); ++ exit(1); ++ } ++ + ret = glusterd_init_var_run_dirs(this, rundir, GLUSTERD_NFS_RUN_DIR); + if (ret) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CREATE_DIR_FAILED, +@@ -1811,9 +1819,6 @@ init(xlator_t *this) + CDS_INIT_LIST_HEAD(&conf->snapshots); + CDS_INIT_LIST_HEAD(&conf->missed_snaps_list); + CDS_INIT_LIST_HEAD(&conf->brick_procs); +- CDS_INIT_LIST_HEAD(&conf->shd_procs); +- pthread_mutex_init(&conf->attach_lock, NULL); +- pthread_mutex_init(&conf->volume_lock, NULL); + + pthread_mutex_init(&conf->mutex, NULL); + conf->rpc = rpc; +@@ -1894,6 +1899,7 @@ init(xlator_t *this) + glusterd_mgmt_v3_lock_timer_init(); + glusterd_txn_opinfo_dict_init(); + ++ glusterd_shdsvc_build(&conf->shd_svc); + glusterd_nfssvc_build(&conf->nfs_svc); + glusterd_quotadsvc_build(&conf->quotad_svc); + glusterd_bitdsvc_build(&conf->bitd_svc); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 0fbc9dd..2be005c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -28,7 +28,6 @@ + #include "glusterd-sm.h" + #include "glusterd-snapd-svc.h" + #include "glusterd-tierd-svc.h" +-#include "glusterd-shd-svc.h" + #include "glusterd-bitd-svc.h" + #include "glusterd1-xdr.h" + #include "protocol-common.h" +@@ -173,6 +172,7 @@ typedef struct { + char workdir[VALID_GLUSTERD_PATHMAX]; + char rundir[VALID_GLUSTERD_PATHMAX]; + rpcsvc_t *rpc; ++ glusterd_svc_t shd_svc; + glusterd_svc_t nfs_svc; + glusterd_svc_t bitd_svc; + glusterd_svc_t scrub_svc; +@@ -181,7 +181,6 @@ typedef struct { + struct cds_list_head volumes; + struct cds_list_head snapshots; /*List of snap volumes */ + struct cds_list_head brick_procs; /* List of brick processes */ +- struct cds_list_head shd_procs; /* List of shd processes */ + pthread_mutex_t xprt_lock; + struct list_head xprt_list; + pthread_mutex_t import_volumes; +@@ -222,11 +221,6 @@ typedef struct { + gf_atomic_t blockers; + uint32_t mgmt_v3_lock_timeout; + gf_boolean_t restart_bricks; +- pthread_mutex_t attach_lock; /* Lock can be per process or a common one */ +- pthread_mutex_t volume_lock; /* We release the big_lock from lot of places +- which might lead the modification of volinfo +- list. +- */ + gf_atomic_t thread_count; + } glusterd_conf_t; + +@@ -519,7 +513,6 @@ struct glusterd_volinfo_ { + + glusterd_snapdsvc_t snapd; + glusterd_tierdsvc_t tierd; +- glusterd_shdsvc_t shd; + glusterd_gfproxydsvc_t gfproxyd; + int32_t quota_xattr_version; + gf_boolean_t stage_deleted; /* volume has passed staging +@@ -646,6 +639,7 @@ typedef enum { + #define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" + #define GLUSTERD_BITD_RUN_DIR "/bitd" + #define GLUSTERD_SCRUB_RUN_DIR "/scrub" ++#define GLUSTERD_GLUSTERSHD_RUN_DIR "/glustershd" + #define GLUSTERD_NFS_RUN_DIR "/nfs" + #define GLUSTERD_QUOTAD_RUN_DIR "/quotad" + #define GLUSTER_SHARED_STORAGE_BRICK_DIR GLUSTERD_DEFAULT_WORKDIR "/ss_brick" +@@ -701,26 +695,6 @@ typedef ssize_t (*gd_serialize_t)(struct iovec outmsg, void *args); + } \ + } while (0) + +-#define GLUSTERD_GET_SHD_RUNDIR(path, volinfo, priv) \ +- do { \ +- int32_t _shd_dir_len; \ +- _shd_dir_len = snprintf(path, PATH_MAX, "%s/shd/%s", priv->rundir, \ +- volinfo->volname); \ +- if ((_shd_dir_len < 0) || (_shd_dir_len >= PATH_MAX)) { \ +- path[0] = 0; \ +- } \ +- } while (0) +- +-#define GLUSTERD_GET_SHD_PID_FILE(path, volinfo, priv) \ +- do { \ +- int32_t _shd_pid_len; \ +- _shd_pid_len = snprintf(path, PATH_MAX, "%s/shd/%s-shd.pid", \ +- priv->rundir, volinfo->volname); \ +- if ((_shd_pid_len < 0) || (_shd_pid_len >= PATH_MAX)) { \ +- path[0] = 0; \ +- } \ +- } while (0) +- + #define GLUSTERD_GET_VOLUME_PID_DIR(path, volinfo, priv) \ + do { \ + int32_t _vol_pid_len; \ +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index 532ef35..e156d4d 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -46,6 +46,7 @@ client_fini_complete(xlator_t *this) + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + + clnt_conf_t *conf = this->private; ++ + if (!conf->destroy) + return 0; + +@@ -68,11 +69,6 @@ client_notify_dispatch_uniq(xlator_t *this, int32_t event, void *data, ...) + return 0; + + return client_notify_dispatch(this, event, data); +- +- /* Please avoid any code that access xlator object here +- * Because for a child down event, once we do the signal +- * we will start cleanup. +- */ + } + + int +@@ -109,11 +105,6 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...) + } + pthread_mutex_unlock(&ctx->notify_lock); + +- /* Please avoid any code that access xlator object here +- * Because for a child down event, once we do the signal +- * we will start cleanup. +- */ +- + return ret; + } + +@@ -2287,7 +2278,6 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + { + xlator_t *this = NULL; + clnt_conf_t *conf = NULL; +- gf_boolean_t is_parent_down = _gf_false; + int ret = 0; + + this = mydata; +@@ -2351,19 +2341,6 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + if (conf->portmap_err_logged) + conf->disconnect_err_logged = 1; + } +- /* +- * Once we complete the child down notification, +- * There is a chance that the graph might get freed, +- * So it is not safe to access any xlator contens +- * So here we are checking whether the parent is down +- * or not. +- */ +- pthread_mutex_lock(&conf->lock); +- { +- is_parent_down = conf->parent_down; +- } +- pthread_mutex_unlock(&conf->lock); +- + /* If the CHILD_DOWN event goes to parent xlator + multiple times, the logic of parent xlator notify + may get screwed up.. (eg. CHILD_MODIFIED event in +@@ -2371,12 +2348,6 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + to parent are genuine */ + ret = client_notify_dispatch_uniq(this, GF_EVENT_CHILD_DOWN, + NULL); +- if (is_parent_down) { +- /* If parent is down, then there should not be any +- * operation after a child down. +- */ +- goto out; +- } + if (ret) + gf_msg(this->name, GF_LOG_INFO, 0, + PC_MSG_CHILD_DOWN_NOTIFY_FAILED, +-- +1.8.3.1 + diff --git a/SOURCES/0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch b/SOURCES/0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch new file mode 100644 index 0000000..028f92c --- /dev/null +++ b/SOURCES/0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch @@ -0,0 +1,57 @@ +From 7a04fb9999f5d25c17f5593eed5e98d0f5a1932d Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Mon, 15 Jul 2019 14:30:52 +0530 +Subject: [PATCH 251/255] tests: Fix + bug-1717819-metadata-split-brain-detection.t failure + +<Backport of: https://review.gluster.org/#/c/glusterfs/+/23043/> + +Problem: +tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t fails +intermittently in test cases #49 & #50, which compare the values of the +user set xattr values after enabling the heal. We are not waiting for +the heal to complete before comparing those values, which might lead +those tests to fail. + +Fix: +Wait till the HEAL-TIMEOUT before comparing the xattr values. +Also cheking for the shd to come up and the bricks to connect to the shd +process in another case. + +Change-Id: I0021c2d5d251111c695e2bf18c63e8189e456114 +fixes: bz#1704562 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176071 +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t +index 94b8bf3..76d1f21 100644 +--- a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t ++++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t +@@ -76,6 +76,10 @@ EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0 + + # Launch heal + TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 + TEST $CLI volume heal $V0 + EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +@@ -117,6 +121,8 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status + EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file) + B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file) +-- +1.8.3.1 + diff --git a/SOURCES/0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch b/SOURCES/0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch new file mode 100644 index 0000000..b722dff --- /dev/null +++ b/SOURCES/0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch @@ -0,0 +1,63 @@ +From 5a35a996257d6aaa7fa55ff1e1aac407dd4824fe Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Fri, 12 Jul 2019 16:28:04 +0530 +Subject: [PATCH 252/255] glusterd: do not mark skip_locking as true for + geo-rep operations + +We need to send the commit req to peers in case of geo-rep +operations even though it is a no volname operation. In commit +phase peers try to set the txn_opinfo which will fail because +it is a no volname operation where we don't require a commit +phase. We mark skip_locking as true for no volname operations, +but we have to give an exception to geo-rep operations, so that +they can set txn_opinfo in commit phase. + +Please refer to detailed RCA at the bug: 1729463 + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23034/ + +>fixes: bz#1729463 +>Change-Id: I9f2478b12a281f6e052035c0563c40543493a3fc +>Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +Change-Id: I9f2478b12a281f6e052035c0563c40543493a3fc +BUG: 1727785 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176032 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Shwetha Acharya <sacharya@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index cb2666b..2e73c98 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -1078,7 +1078,11 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req) + + /* In cases where there is no volname, the receivers won't have a + * transaction opinfo created, as for those operations, the locking +- * phase where the transaction opinfos are created, won't be called. */ ++ * phase where the transaction opinfos are created, won't be called. ++ * skip_locking will be true for all such transaction and we clear ++ * the txn_opinfo after the staging phase, except for geo-replication ++ * operations where we need to access txn_opinfo in the later phases also. ++ */ + ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg_debug(this->name, 0, "No transaction's opinfo set"); +@@ -1087,7 +1091,8 @@ __glusterd_handle_stage_op(rpcsvc_request_t *req) + glusterd_txn_opinfo_init(&txn_op_info, &state, &op_req.op, + req_ctx->dict, req); + +- txn_op_info.skip_locking = _gf_true; ++ if (req_ctx->op != GD_OP_GSYNC_SET) ++ txn_op_info.skip_locking = _gf_true; + ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL, +-- +1.8.3.1 + diff --git a/SOURCES/0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch b/SOURCES/0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch new file mode 100644 index 0000000..d313482 --- /dev/null +++ b/SOURCES/0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch @@ -0,0 +1,246 @@ +From ea7f11b989896d76b8d091d26bc0241bce9413f8 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Thu, 4 Jul 2019 13:21:33 +0200 +Subject: [PATCH 253/255] core: fix deadlock between statedump and + fd_anonymous() + +There exists a deadlock between statedump generation and fd_anonymous() +function because they are acquiring inode table lock and inode lock in +reverse order. + +This patch modifies fd_anonymous() so that it takes inode lock only when +it's really necessary, avoiding the deadlock. + +Upstream patch: +> Change-Id: I24355447f0ea1b39e2546782ad07f0512cc381e7 +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22995 +> BUG: 1727068 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: I24355447f0ea1b39e2546782ad07f0512cc381e7 +Fixes: bz#1722209 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176096 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/fd.c | 137 ++++++++++++++++++++++---------------------------- + 1 file changed, 61 insertions(+), 76 deletions(-) + +diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c +index b8aac72..314546a 100644 +--- a/libglusterfs/src/fd.c ++++ b/libglusterfs/src/fd.c +@@ -532,7 +532,7 @@ fd_unref(fd_t *fd) + return; + } + +-fd_t * ++static fd_t * + __fd_bind(fd_t *fd) + { + list_del_init(&fd->inode_list); +@@ -562,9 +562,9 @@ fd_bind(fd_t *fd) + } + + static fd_t * +-__fd_create(inode_t *inode, uint64_t pid) ++fd_allocate(inode_t *inode, uint64_t pid) + { +- fd_t *fd = NULL; ++ fd_t *fd; + + if (inode == NULL) { + gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, +@@ -573,64 +573,67 @@ __fd_create(inode_t *inode, uint64_t pid) + } + + fd = mem_get0(inode->table->fd_mem_pool); +- if (!fd) +- goto out; ++ if (fd == NULL) { ++ return NULL; ++ } + + fd->xl_count = inode->table->xl->graph->xl_count + 1; + + fd->_ctx = GF_CALLOC(1, (sizeof(struct _fd_ctx) * fd->xl_count), + gf_common_mt_fd_ctx); +- if (!fd->_ctx) +- goto free_fd; ++ if (fd->_ctx == NULL) { ++ goto failed; ++ } + + fd->lk_ctx = fd_lk_ctx_create(); +- if (!fd->lk_ctx) +- goto free_fd_ctx; +- +- fd->inode = inode_ref(inode); +- fd->pid = pid; +- INIT_LIST_HEAD(&fd->inode_list); +- +- LOCK_INIT(&fd->lock); +-out: +- return fd; ++ if (fd->lk_ctx != NULL) { ++ /* We need to take a reference from the inode, but we cannot do it ++ * here because this function can be called with the inode lock taken ++ * and inode_ref() takes the inode's table lock. This is the reverse ++ * of the logical lock acquisition order and can cause a deadlock. So ++ * we simply assign the inode here and we delefate the inode reference ++ * responsibility to the caller (when this function succeeds and the ++ * inode lock is released). This is safe because the caller must hold ++ * a reference of the inode to use it, so it's guaranteed that the ++ * number of references won't reach 0 before the caller finishes. ++ * ++ * TODO: minimize use of locks in favor of atomic operations to avoid ++ * these dependencies. */ ++ fd->inode = inode; ++ fd->pid = pid; ++ INIT_LIST_HEAD(&fd->inode_list); ++ LOCK_INIT(&fd->lock); ++ GF_ATOMIC_INIT(fd->refcount, 1); ++ return fd; ++ } + +-free_fd_ctx: + GF_FREE(fd->_ctx); +-free_fd: ++ ++failed: + mem_put(fd); + + return NULL; + } + + fd_t * +-fd_create(inode_t *inode, pid_t pid) ++fd_create_uint64(inode_t *inode, uint64_t pid) + { +- fd_t *fd = NULL; +- +- fd = __fd_create(inode, (uint64_t)pid); +- if (!fd) +- goto out; ++ fd_t *fd; + +- fd = fd_ref(fd); ++ fd = fd_allocate(inode, pid); ++ if (fd != NULL) { ++ /* fd_allocate() doesn't get a reference from the inode. We need to ++ * take it here in case of success. */ ++ inode_ref(inode); ++ } + +-out: + return fd; + } + + fd_t * +-fd_create_uint64(inode_t *inode, uint64_t pid) ++fd_create(inode_t *inode, pid_t pid) + { +- fd_t *fd = NULL; +- +- fd = __fd_create(inode, pid); +- if (!fd) +- goto out; +- +- fd = fd_ref(fd); +- +-out: +- return fd; ++ return fd_create_uint64(inode, (uint64_t)pid); + } + + static fd_t * +@@ -719,10 +722,13 @@ __fd_lookup_anonymous(inode_t *inode, int32_t flags) + return fd; + } + +-static fd_t * +-__fd_anonymous(inode_t *inode, int32_t flags) ++fd_t * ++fd_anonymous_with_flags(inode_t *inode, int32_t flags) + { + fd_t *fd = NULL; ++ bool ref = false; ++ ++ LOCK(&inode->lock); + + fd = __fd_lookup_anonymous(inode, flags); + +@@ -730,54 +736,33 @@ __fd_anonymous(inode_t *inode, int32_t flags) + __fd_lookup_anonymous(), so no need of one more fd_ref(). + if (!fd); then both create and bind won't bump up the ref + count, so we have to call fd_ref() after bind. */ +- if (!fd) { +- fd = __fd_create(inode, 0); +- +- if (!fd) +- return NULL; +- +- fd->anonymous = _gf_true; +- fd->flags = GF_ANON_FD_FLAGS | flags; ++ if (fd == NULL) { ++ fd = fd_allocate(inode, 0); ++ if (fd != NULL) { ++ fd->anonymous = _gf_true; ++ fd->flags = GF_ANON_FD_FLAGS | (flags & O_DIRECT); + +- __fd_bind(fd); ++ __fd_bind(fd); + +- __fd_ref(fd); ++ ref = true; ++ } + } + +- return fd; +-} +- +-fd_t * +-fd_anonymous(inode_t *inode) +-{ +- fd_t *fd = NULL; ++ UNLOCK(&inode->lock); + +- LOCK(&inode->lock); +- { +- fd = __fd_anonymous(inode, GF_ANON_FD_FLAGS); ++ if (ref) { ++ /* fd_allocate() doesn't get a reference from the inode. We need to ++ * take it here in case of success. */ ++ inode_ref(inode); + } +- UNLOCK(&inode->lock); + + return fd; + } + + fd_t * +-fd_anonymous_with_flags(inode_t *inode, int32_t flags) ++fd_anonymous(inode_t *inode) + { +- fd_t *fd = NULL; +- +- if (flags & O_DIRECT) +- flags = GF_ANON_FD_FLAGS | O_DIRECT; +- else +- flags = GF_ANON_FD_FLAGS; +- +- LOCK(&inode->lock); +- { +- fd = __fd_anonymous(inode, flags); +- } +- UNLOCK(&inode->lock); +- +- return fd; ++ return fd_anonymous_with_flags(inode, 0); + } + + fd_t * +-- +1.8.3.1 + diff --git a/SOURCES/0254-Detach-iot_worker-to-release-its-resources.patch b/SOURCES/0254-Detach-iot_worker-to-release-its-resources.patch new file mode 100644 index 0000000..6019436 --- /dev/null +++ b/SOURCES/0254-Detach-iot_worker-to-release-its-resources.patch @@ -0,0 +1,43 @@ +From 2bbb097d087bb5ef142775500708f11ccd31bac0 Mon Sep 17 00:00:00 2001 +From: Liguang Li <liguang.lee6@gmail.com> +Date: Fri, 21 Jun 2019 12:18:58 +0800 +Subject: [PATCH 254/255] Detach iot_worker to release its resources + +When iot_worker terminates, its resources have not been reaped, which +will consumes lots of memory. + +Detach iot_worker to automically release its resources back to the +system. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22918/ + +>fixes: bz#1729107 +>Change-Id: I71fabb2940e76ad54dc56b4c41aeeead2644b8bb +>Signed-off-by: Liguang Li <liguang.lee6@gmail.com> + +BUG:1729108 +Change-Id: I71fabb2940e76ad54dc56b4c41aeeead2644b8bb +Signed-off-by: Liguang Li <liguang.lee6@gmail.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176106 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/performance/io-threads/src/io-threads.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c +index 060d923..1d96691 100644 +--- a/xlators/performance/io-threads/src/io-threads.c ++++ b/xlators/performance/io-threads/src/io-threads.c +@@ -835,6 +835,7 @@ __iot_workers_scale(iot_conf_t *conf) + ret = gf_thread_create(&thread, &conf->w_attr, iot_worker, conf, + "iotwr%03hx", conf->curr_count & 0x3ff); + if (ret == 0) { ++ pthread_detach(thread); + conf->curr_count++; + gf_msg_debug(conf->this->name, 0, + "scaled threads to %d (queue_size=%d/%d)", +-- +1.8.3.1 + diff --git a/SOURCES/0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch b/SOURCES/0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch new file mode 100644 index 0000000..89a8aaf --- /dev/null +++ b/SOURCES/0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch @@ -0,0 +1,104 @@ +From 684a4949552164d3469329b3f959de4369d54faa Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Sun, 14 Jul 2019 08:06:11 +0530 +Subject: [PATCH 255/255] Revert "tier/shd/glusterd: with shd mux, the shd + volfile path have to be updated for tier-heald.t" + +This reverts commit 6e7d333625ecd9f7402c2e839338350fa86eaf45. + +Updates: bz#1471742 +Change-Id: I6c27634999f72b5bbb35d5d13cdebda7af072b01 +Reviewed-on: https://code.engineering.redhat.com/gerrit/176017 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/tier/tier-heald.t | 35 +++++++++++++++-------------------- + 1 file changed, 15 insertions(+), 20 deletions(-) + +diff --git a/tests/basic/tier/tier-heald.t b/tests/basic/tier/tier-heald.t +index 0ec9e43..a8e634f 100644 +--- a/tests/basic/tier/tier-heald.t ++++ b/tests/basic/tier/tier-heald.t +@@ -11,7 +11,7 @@ cleanup; + TEST glusterd + TEST pidof glusterd + +-r2_volfile=$(gluster system:: getwd)"/vols/r2/r2-shd.vol" ++volfile=$(gluster system:: getwd)"/glustershd/glustershd-server.vol" + + # Commands should fail when both tiers are not of distribute type. + # Glustershd shouldn't be running as long as there are no replicate/disperse +@@ -34,56 +34,51 @@ TEST $CLI volume tier r2 attach $H0:$B0/r2_hot + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 enable + EXPECT "enable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "enable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "enable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal r2 disable + EXPECT "disable" volume_option r2 "cluster.self-heal-daemon" +-EXPECT "disable" volgen_volume_option $r2_volfile r2-replicate-0 cluster replicate self-heal-daemon ++EXPECT "disable" volgen_volume_option $volfile r2-replicate-0 cluster replicate self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + # Commands should work on disperse volume. + TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2 + TEST $CLI volume start ec2 + +-ec2_volfile=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol" +- + TEST $CLI volume tier ec2 attach replica 2 $H0:$B0/ec2_hot{1..4} + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 enable + EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "enable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "enable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + TEST $CLI volume heal ec2 disable + EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon" +-EXPECT "disable" volgen_volume_option $ec2_volfile ec2-disperse-0 cluster disperse self-heal-daemon ++EXPECT "disable" volgen_volume_option $volfile ec2-disperse-0 cluster disperse self-heal-daemon + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid + + #Check that shd graph is rewritten correctly on volume stop/start +-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop r2 +-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse +- +-# Has been commented as the validations after stop using volfile dont hold true. +-#EXPECT "N" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $volfile r2-replicate-0 cluster replicate + TEST $CLI volume stop ec2 + # When both the volumes are stopped glustershd volfile is not modified just the + # process is stopped + TEST "[ -z $(get_shd_process_pid) ]" + + TEST $CLI volume start r2 +-# Has been commented as the validations after stop using volfile dont hold true. +-#EXPECT "N" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $r2_volfile r2-replicate-0 cluster replicate ++EXPECT "N" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile r2-replicate-0 cluster replicate + + TEST $CLI volume start ec2 + +-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse +-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "Y" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate + + TEST $CLI volume tier ec2 detach force + +-EXPECT "Y" volgen_volume_exists $ec2_volfile ec2-disperse-0 cluster disperse +-EXPECT "N" volgen_volume_exists $ec2_volfile ec2-replicate-0 cluster replicate ++EXPECT "Y" volgen_volume_exists $volfile ec2-disperse-0 cluster disperse ++EXPECT "N" volgen_volume_exists $volfile ec2-replicate-0 cluster replicate + + TEST $CLI volume set r2 self-heal-daemon on + TEST $CLI volume set r2 cluster.self-heal-daemon off +-- +1.8.3.1 + diff --git a/SOURCES/0256-features-snapview-server-use-the-same-volfile-server.patch b/SOURCES/0256-features-snapview-server-use-the-same-volfile-server.patch new file mode 100644 index 0000000..d410373 --- /dev/null +++ b/SOURCES/0256-features-snapview-server-use-the-same-volfile-server.patch @@ -0,0 +1,117 @@ +From f90df1167bc70c634ba33c181232321da6770709 Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat <raghavendra@redhat.com> +Date: Tue, 25 Jun 2019 10:51:33 -0400 +Subject: [PATCH 256/261] features/snapview-server: use the same volfile server + for gfapi options + +snapview server xlator makes use of "localhost" as the volfile server while +initing the new glfs instance to talk to a snapshot. While localhost is fine, +better use the same volfile server that was used to start the snapshot +daemon containing the snapview-server xlator. + +Upstream Patch: +>Change-Id: I4485d39b0e3d066f481adc6958ace53ea33237f7 +>fixes: bz#1725211 +>Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com> +> patch: https://review.gluster.org/#/c/glusterfs/+/22974/ + +BUG: 1722757 +Change-Id: I4485d39b0e3d066f481adc6958ace53ea33237f7 +Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/175984 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../snapview-server/src/snapview-server-helpers.c | 44 ++++++++++++++++++++-- + .../snapview-server/src/snapview-server-messages.h | 2 +- + 2 files changed, 42 insertions(+), 4 deletions(-) + +diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c +index 5514a54..62c1dda 100644 +--- a/xlators/features/snapview-server/src/snapview-server-helpers.c ++++ b/xlators/features/snapview-server/src/snapview-server-helpers.c +@@ -476,6 +476,7 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name, + char logfile[PATH_MAX] = { + 0, + }; ++ char *volfile_server = NULL; + + GF_VALIDATE_OR_GOTO("snapview-server", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); +@@ -512,14 +513,50 @@ __svs_initialise_snapshot_volume(xlator_t *this, const char *name, + goto out; + } + +- ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007); ++ /* ++ * Before, localhost was used as the volfile server. But, with that ++ * method, accessing snapshots started giving ENOENT error if a ++ * specific bind address is mentioned in the glusterd volume file. ++ * Check the bug https://bugzilla.redhat.com/show_bug.cgi?id=1725211. ++ * So, the new method is tried below, where, snapview-server first ++ * uses the volfile server used by the snapd (obtained from the ++ * command line arguments saved in the global context of the process). ++ * If the volfile server in global context is NULL, then localhost ++ * is tried (like before). ++ */ ++ if (this->ctx->cmd_args.volfile_server) { ++ volfile_server = gf_strdup(this->ctx->cmd_args.volfile_server); ++ if (!volfile_server) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, ++ SVS_MSG_VOLFILE_SERVER_GET_FAIL, ++ "failed to copy volfile server %s. ", ++ this->ctx->cmd_args.volfile_server); ++ ret = -1; ++ goto out; ++ } ++ } else { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, ++ SVS_MSG_VOLFILE_SERVER_GET_FAIL, ++ "volfile server is NULL in cmd args. " ++ "Trying with localhost"); ++ volfile_server = gf_strdup("localhost"); ++ if (!volfile_server) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, ++ SVS_MSG_VOLFILE_SERVER_GET_FAIL, ++ "failed to copy volfile server localhost."); ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, local_errno, + SVS_MSG_SET_VOLFILE_SERVR_FAILED, + "setting the " +- "volfile server for snap volume %s " ++ "volfile server %s for snap volume %s " + "failed", +- dirent->name); ++ volfile_server, dirent->name); + goto out; + } + +@@ -561,6 +598,7 @@ out: + dirent->fs = fs; + } + ++ GF_FREE(volfile_server); + return fs; + } + +diff --git a/xlators/features/snapview-server/src/snapview-server-messages.h b/xlators/features/snapview-server/src/snapview-server-messages.h +index 8548015..f634ab5 100644 +--- a/xlators/features/snapview-server/src/snapview-server-messages.h ++++ b/xlators/features/snapview-server/src/snapview-server-messages.h +@@ -49,6 +49,6 @@ GLFS_MSGID(SNAPVIEW_SERVER, SVS_MSG_NO_MEMORY, SVS_MSG_MEM_ACNT_FAILED, + SVS_MSG_CLOSEDIR_FAILED, SVS_MSG_CLOSE_FAILED, + SVS_MSG_GFID_GEN_FAILED, SVS_MSG_GLFS_NEW_FAILED, + SVS_MSG_SET_VOLFILE_SERVR_FAILED, SVS_MSG_SET_LOGGING_FAILED, +- SVS_MSG_GLFS_INIT_FAILED); ++ SVS_MSG_VOLFILE_SERVER_GET_FAIL, SVS_MSG_GLFS_INIT_FAILED); + + #endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */ +-- +1.8.3.1 + diff --git a/SOURCES/0257-geo-rep-Test-case-for-upgrading-config-file.patch b/SOURCES/0257-geo-rep-Test-case-for-upgrading-config-file.patch new file mode 100644 index 0000000..ffe44f1 --- /dev/null +++ b/SOURCES/0257-geo-rep-Test-case-for-upgrading-config-file.patch @@ -0,0 +1,80 @@ +From ed6cd2b7674896c810fdd059e35a0d319aacb068 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Tue, 2 Jul 2019 15:00:25 +0530 +Subject: [PATCH 257/261] geo-rep: Test case for upgrading config file + +Added test case for the patch +https://review.gluster.org/#/c/glusterfs/+/22894/4 + +Also updated if else structure in gsyncdconfig.py to avoid +repeated occurance of values in new configfile. + +>fixes: bz#1707731 +>Change-Id: If97e1d37ac52dbd17d47be6cb659fc5a3ccab6d7 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/22982/ + +Bug: 1708064 +Change-Id: If97e1d37ac52dbd17d47be6cb659fc5a3ccab6d7 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176603 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/gsyncdconfig.py | 11 +++++------ + tests/00-geo-rep/georep-basic-dr-rsync.t | 13 +++++++++++++ + 2 files changed, 18 insertions(+), 6 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index 7edc582..1fc451f 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -353,15 +353,14 @@ def config_upgrade(config_file, ret): + new_value = "tarssh" + else: + new_value = "rsync" +- config.set('vars', new_key, new_value) +- +- if key == "timeout": ++ config.set('vars', new_key, new_value) ++ elif key == "timeout": + new_key = "slave-timeout" + config.set('vars', new_key, value) +- + #for changes like: ignore_deletes to ignore-deletes +- new_key = key.replace("_", "-") +- config.set('vars', new_key, value) ++ else: ++ new_key = key.replace("_", "-") ++ config.set('vars', new_key, value) + + with open(config_file, 'w') as configfile: + config.write(configfile) +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index 428e9ed..b432635 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -212,6 +212,19 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave + #Verify arequal for whole volume + EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} + ++#Test config upgrade BUG: 1707731 ++config_file=$GLUSTERD_WORKDIR/geo-replication/${GMV0}_${SH0}_${GSV0}/gsyncd.conf ++cat >> $config_file<<EOL ++[peers ${GMV0} ${GSV0}] ++use_tarssh = true ++timeout = 1 ++EOL ++TEST $GEOREP_CLI $master $slave stop ++TEST $GEOREP_CLI $master $slave start ++#verify that the config file is updated ++EXPECT "1" echo $(grep -Fc "vars" $config_file) ++EXPECT "1" echo $(grep -Fc "sync-method = tarssh" $config_file) ++EXPECT "1" echo $(grep -Fc "slave-timeout = 1" $config_file) + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave stop + +-- +1.8.3.1 + diff --git a/SOURCES/0258-geo-rep-Fix-mount-broker-setup-issue.patch b/SOURCES/0258-geo-rep-Fix-mount-broker-setup-issue.patch new file mode 100644 index 0000000..ba2a81f --- /dev/null +++ b/SOURCES/0258-geo-rep-Fix-mount-broker-setup-issue.patch @@ -0,0 +1,53 @@ +From de24d64c2599e48a05e8792b845cfecc210a6fc5 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 22 Jul 2019 17:35:21 +0530 +Subject: [PATCH 258/261] geo-rep: Fix mount broker setup issue + +The patch [1] added validation in gverify.sh to check if the gluster +binary on slave by executing gluster directly on slave. But for +non-root users, even though gluster binary is present, path is not +found when executed via ssh. Hence validate the gluster binary using +bash builtin 'type' command. + +[1] https://review.gluster.org/19224 + +Backport of: + > Patch: https://review.gluster.org/23089 + > Change-Id: I93ca62c0c5b1e16263e586ddbbca8407d60ca126 + > fixes: bz#1731920 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: I93ca62c0c5b1e16263e586ddbbca8407d60ca126 +BUG: 1720992 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176727 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/src/gverify.sh | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh +index 7c88f9f..692c1d6 100755 +--- a/geo-replication/src/gverify.sh ++++ b/geo-replication/src/gverify.sh +@@ -207,13 +207,13 @@ function main() + fi; + + if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then +- err=$((ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "gluster --version") 2>&1) ++ ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster" + else +- err=$((ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "gluster --version") 2>&1) ++ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster" + fi + + if [ $? -ne 0 ]; then +- echo "FORCE_BLOCKER|gluster command on $2@$3 failed. Error: $err" > $log_file ++ echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file + exit 1; + fi; + +-- +1.8.3.1 + diff --git a/SOURCES/0259-gluster-block-tuning-perf-options.patch b/SOURCES/0259-gluster-block-tuning-perf-options.patch new file mode 100644 index 0000000..a8ef9f4 --- /dev/null +++ b/SOURCES/0259-gluster-block-tuning-perf-options.patch @@ -0,0 +1,47 @@ +From 775a62906030e5b5dc60f17284a7d516ce4118f9 Mon Sep 17 00:00:00 2001 +From: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> +Date: Thu, 27 Jun 2019 13:18:32 +0530 +Subject: [PATCH 259/261] gluster-block: tuning perf options + +As per the perf experiment run by Elvir, with NVME devices used for OCP (gluster) +it was seen that particularly read operations (read/randread) benefited from +these options. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22963/ + +>Change-Id: Ibec4b96afd28e6f7e757b6ef203ccdbc0d9854d5 +>Fixes: bz#1727852 +>Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> + +Change-Id: Ibec4b96afd28e6f7e757b6ef203ccdbc0d9854d5 +BUG: 1708180 +Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176724 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + extras/group-gluster-block | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/extras/group-gluster-block b/extras/group-gluster-block +index 56b406e..1e39801 100644 +--- a/extras/group-gluster-block ++++ b/extras/group-gluster-block +@@ -5,6 +5,14 @@ performance.stat-prefetch=off + performance.open-behind=off + performance.readdir-ahead=off + performance.strict-o-direct=on ++performance.client-io-threads=on ++performance.io-thread-count=32 ++performance.high-prio-threads=32 ++performance.normal-prio-threads=32 ++performance.low-prio-threads=32 ++performance.least-prio-threads=4 ++client.event-threads=8 ++server.event-threads=8 + network.remote-dio=disable + cluster.eager-lock=enable + cluster.quorum-type=auto +-- +1.8.3.1 + diff --git a/SOURCES/0260-ctime-Set-mdata-xattr-on-legacy-files.patch b/SOURCES/0260-ctime-Set-mdata-xattr-on-legacy-files.patch new file mode 100644 index 0000000..f07fb21 --- /dev/null +++ b/SOURCES/0260-ctime-Set-mdata-xattr-on-legacy-files.patch @@ -0,0 +1,885 @@ +From fc0903de1f7565e06db9d41e6dfd62221a745d24 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 24 Jun 2019 13:06:49 +0530 +Subject: [PATCH 260/261] ctime: Set mdata xattr on legacy files + +Problem: +The files which were created before ctime enabled would not +have "trusted.glusterfs.mdata"(stores time attributes) xattr. +Upon fops which modifies either ctime or mtime, the xattr +gets created with latest ctime, mtime and atime, which is +incorrect. It should update only the corresponding time +attribute and rest from backend + +Solution: +Creating xattr with values from brick is not possible as +each brick of replica set would have different times. +So create the xattr upon successful lookup if the xattr +is not created + +Note To Reviewers: +The time attributes used to set xattr is got from successful +lookup. Instead of sending the whole iatt over the wire via +setxattr, a structure called mdata_iatt is sent. The mdata_iatt +contains only time attributes. + +Backport of + > Patch: https://review.gluster.org/22936 + > Change-Id: I5e535631ddef04195361ae0364336410a2895dd4 + > fixes: bz#1593542 + +Change-Id: I5e535631ddef04195361ae0364336410a2895dd4 +BUG: 1715422 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176725 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + libglusterfs/src/dict.c | 59 ++++++++++ + libglusterfs/src/glusterfs/dict.h | 5 + + libglusterfs/src/glusterfs/glusterfs.h | 3 + + libglusterfs/src/glusterfs/iatt.h | 20 ++++ + libglusterfs/src/libglusterfs.sym | 3 + + rpc/xdr/src/glusterfs-fops.x | 1 + + rpc/xdr/src/glusterfs3.h | 59 ++++++++++ + rpc/xdr/src/glusterfs4-xdr.x | 12 ++ + rpc/xdr/src/libgfxdr.sym | 3 +- + tests/basic/ctime/ctime-mdata-legacy-files.t | 83 +++++++++++++ + xlators/features/utime/src/utime-messages.h | 3 +- + xlators/features/utime/src/utime.c | 154 ++++++++++++++++++++++--- + xlators/storage/posix/src/posix-inode-fd-ops.c | 17 +++ + xlators/storage/posix/src/posix-messages.h | 3 +- + xlators/storage/posix/src/posix-metadata.c | 103 ++++++++++------- + xlators/storage/posix/src/posix-metadata.h | 4 + + 16 files changed, 475 insertions(+), 57 deletions(-) + create mode 100644 tests/basic/ctime/ctime-mdata-legacy-files.t + +diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c +index 6917df9..d8cdda4 100644 +--- a/libglusterfs/src/dict.c ++++ b/libglusterfs/src/dict.c +@@ -124,6 +124,7 @@ int32_t + is_data_equal(data_t *one, data_t *two) + { + struct iatt *iatt1, *iatt2; ++ struct mdata_iatt *mdata_iatt1, *mdata_iatt2; + + if (!one || !two || !one->data || !two->data) { + gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, +@@ -188,6 +189,24 @@ is_data_equal(data_t *one, data_t *two) + */ + return 1; + } ++ if (one->data_type == GF_DATA_TYPE_MDATA) { ++ if ((one->len < sizeof(struct mdata_iatt)) || ++ (two->len < sizeof(struct mdata_iatt))) { ++ return 0; ++ } ++ mdata_iatt1 = (struct mdata_iatt *)one->data; ++ mdata_iatt2 = (struct mdata_iatt *)two->data; ++ ++ if (mdata_iatt1->ia_atime != mdata_iatt2->ia_atime || ++ mdata_iatt1->ia_mtime != mdata_iatt2->ia_mtime || ++ mdata_iatt1->ia_ctime != mdata_iatt2->ia_ctime || ++ mdata_iatt1->ia_atime_nsec != mdata_iatt2->ia_atime_nsec || ++ mdata_iatt1->ia_mtime_nsec != mdata_iatt2->ia_mtime_nsec || ++ mdata_iatt1->ia_ctime_nsec != mdata_iatt2->ia_ctime_nsec) { ++ return 0; ++ } ++ return 1; ++ } + + if (one->len != two->len) + return 0; +@@ -1078,6 +1097,7 @@ static char *data_type_name[GF_DATA_TYPE_MAX] = { + [GF_DATA_TYPE_PTR] = "pointer", + [GF_DATA_TYPE_GFUUID] = "gf-uuid", + [GF_DATA_TYPE_IATT] = "iatt", ++ [GF_DATA_TYPE_MDATA] = "mdata", + }; + + int64_t +@@ -2666,6 +2686,45 @@ err: + } + + int ++dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata, ++ bool is_static) ++{ ++ return dict_set_bin_common(this, key, mdata, sizeof(struct mdata_iatt), ++ is_static, GF_DATA_TYPE_MDATA); ++} ++ ++int ++dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata) ++{ ++ data_t *data = NULL; ++ int ret = -EINVAL; ++ ++ if (!this || !key || !mdata) { ++ goto err; ++ } ++ ret = dict_get_with_ref(this, key, &data); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_MDATA, key, -EINVAL); ++ if (data->len < sizeof(struct mdata_iatt)) { ++ gf_msg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF, ++ "data value for '%s' is smaller than expected", key); ++ ret = -ENOBUFS; ++ goto err; ++ } ++ ++ memcpy(mdata, data->data, min(data->len, sizeof(struct mdata_iatt))); ++ ++err: ++ if (data) ++ data_unref(data); ++ ++ return ret; ++} ++ ++int + dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static) + { + return dict_set_bin_common(this, key, iatt, sizeof(struct iatt), is_static, +diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h +index 022f564..8239c7a 100644 +--- a/libglusterfs/src/glusterfs/dict.h ++++ b/libglusterfs/src/glusterfs/dict.h +@@ -392,6 +392,11 @@ GF_MUST_CHECK int + dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static); + GF_MUST_CHECK int + dict_get_iatt(dict_t *this, char *key, struct iatt *iatt); ++GF_MUST_CHECK int ++dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata, ++ bool is_static); ++GF_MUST_CHECK int ++dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata); + + void + dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain); +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 2cedf1a..79c93ae 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -229,6 +229,9 @@ enum gf_internal_fop_indicator { + #define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup" + #define QUOTA_READ_ONLY_KEY "trusted.glusterfs.quota.read-only" + ++/* ctime related */ ++#define CTIME_MDATA_XDATA_KEY "set-ctime-mdata" ++ + /* afr related */ + #define AFR_XATTR_PREFIX "trusted.afr" + +diff --git a/libglusterfs/src/glusterfs/iatt.h b/libglusterfs/src/glusterfs/iatt.h +index bee7a0a..f03d68b 100644 +--- a/libglusterfs/src/glusterfs/iatt.h ++++ b/libglusterfs/src/glusterfs/iatt.h +@@ -92,6 +92,15 @@ struct old_iatt { + uint32_t ia_ctime_nsec; + }; + ++struct mdata_iatt { ++ int64_t ia_atime; /* last access time */ ++ int64_t ia_mtime; /* last modification time */ ++ int64_t ia_ctime; /* last status change time */ ++ uint32_t ia_atime_nsec; ++ uint32_t ia_mtime_nsec; ++ uint32_t ia_ctime_nsec; ++}; ++ + /* 64-bit mask for valid members in struct iatt. */ + #define IATT_TYPE 0x0000000000000001U + #define IATT_MODE 0x0000000000000002U +@@ -313,6 +322,17 @@ st_mode_from_ia(ia_prot_t prot, ia_type_t type) + return st_mode; + } + ++static inline void ++iatt_to_mdata(struct mdata_iatt *mdata, struct iatt *iatt) ++{ ++ mdata->ia_atime = iatt->ia_atime; ++ mdata->ia_atime_nsec = iatt->ia_atime_nsec; ++ mdata->ia_mtime = iatt->ia_mtime; ++ mdata->ia_mtime_nsec = iatt->ia_mtime_nsec; ++ mdata->ia_ctime = iatt->ia_ctime; ++ mdata->ia_ctime_nsec = iatt->ia_ctime_nsec; ++} ++ + static inline int + iatt_from_stat(struct iatt *iatt, struct stat *stat) + { +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 4dca7de..b161380 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -380,6 +380,7 @@ dict_get_bin + dict_get_double + dict_get_gfuuid + dict_get_iatt ++dict_get_mdata + dict_get_int16 + dict_get_int32 + dict_get_int32n +@@ -417,6 +418,7 @@ dict_set_dynstrn + dict_set_dynstr_with_alloc + dict_set_gfuuid + dict_set_iatt ++dict_set_mdata + dict_set_int16 + dict_set_int32 + dict_set_int32n +@@ -509,6 +511,7 @@ fop_lease_stub + fop_link_stub + fop_lk_stub + fop_log_level ++fop_lookup_cbk_stub + fop_lookup_stub + fop_mkdir_stub + fop_mknod_stub +diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x +index bacf0773..651f8de 100644 +--- a/rpc/xdr/src/glusterfs-fops.x ++++ b/rpc/xdr/src/glusterfs-fops.x +@@ -245,5 +245,6 @@ enum gf_dict_data_type_t { + GF_DATA_TYPE_PTR, + GF_DATA_TYPE_GFUUID, + GF_DATA_TYPE_IATT, ++ GF_DATA_TYPE_MDATA, + GF_DATA_TYPE_MAX + }; +diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h +index 5521f4d..86b3a4c 100644 +--- a/rpc/xdr/src/glusterfs3.h ++++ b/rpc/xdr/src/glusterfs3.h +@@ -585,6 +585,34 @@ out: + } + + static inline void ++gfx_mdata_iatt_to_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt, ++ struct mdata_iatt *mdata_iatt) ++{ ++ if (!mdata_iatt || !gf_mdata_iatt) ++ return; ++ mdata_iatt->ia_atime = gf_mdata_iatt->ia_atime; ++ mdata_iatt->ia_atime_nsec = gf_mdata_iatt->ia_atime_nsec; ++ mdata_iatt->ia_mtime = gf_mdata_iatt->ia_mtime; ++ mdata_iatt->ia_mtime_nsec = gf_mdata_iatt->ia_mtime_nsec; ++ mdata_iatt->ia_ctime = gf_mdata_iatt->ia_ctime; ++ mdata_iatt->ia_ctime_nsec = gf_mdata_iatt->ia_ctime_nsec; ++} ++ ++static inline void ++gfx_mdata_iatt_from_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt, ++ struct mdata_iatt *mdata_iatt) ++{ ++ if (!mdata_iatt || !gf_mdata_iatt) ++ return; ++ gf_mdata_iatt->ia_atime = mdata_iatt->ia_atime; ++ gf_mdata_iatt->ia_atime_nsec = mdata_iatt->ia_atime_nsec; ++ gf_mdata_iatt->ia_mtime = mdata_iatt->ia_mtime; ++ gf_mdata_iatt->ia_mtime_nsec = mdata_iatt->ia_mtime_nsec; ++ gf_mdata_iatt->ia_ctime = mdata_iatt->ia_ctime; ++ gf_mdata_iatt->ia_ctime_nsec = mdata_iatt->ia_ctime_nsec; ++} ++ ++static inline void + gfx_stat_to_iattx(struct gfx_iattx *gf_stat, struct iatt *iatt) + { + if (!iatt || !gf_stat) +@@ -721,6 +749,12 @@ dict_to_xdr(dict_t *this, gfx_dict *dict) + gfx_stat_from_iattx(&xpair->value.gfx_value_u.iatt, + (struct iatt *)dpair->value->data); + break; ++ case GF_DATA_TYPE_MDATA: ++ index++; ++ gfx_mdata_iatt_from_mdata_iatt( ++ &xpair->value.gfx_value_u.mdata_iatt, ++ (struct mdata_iatt *)dpair->value->data); ++ break; + case GF_DATA_TYPE_GFUUID: + index++; + memcpy(&xpair->value.gfx_value_u.uuid, dpair->value->data, +@@ -787,6 +821,7 @@ xdr_to_dict(gfx_dict *dict, dict_t **to) + dict_t *this = NULL; + unsigned char *uuid = NULL; + struct iatt *iatt = NULL; ++ struct mdata_iatt *mdata_iatt = NULL; + + if (!to || !dict) + goto out; +@@ -854,6 +889,30 @@ xdr_to_dict(gfx_dict *dict, dict_t **to) + gfx_stat_to_iattx(&xpair->value.gfx_value_u.iatt, iatt); + ret = dict_set_iatt(this, key, iatt, false); + break; ++ case GF_DATA_TYPE_MDATA: ++ mdata_iatt = GF_CALLOC(1, sizeof(struct mdata_iatt), ++ gf_common_mt_char); ++ if (!mdata_iatt) { ++ errno = ENOMEM; ++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, ++ "failed to allocate memory. key: %s", key); ++ ret = -1; ++ goto out; ++ } ++ gfx_mdata_iatt_to_mdata_iatt( ++ &xpair->value.gfx_value_u.mdata_iatt, mdata_iatt); ++ ret = dict_set_mdata(this, key, mdata_iatt, false); ++ if (ret != 0) { ++ GF_FREE(mdata_iatt); ++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, ++ LG_MSG_DICT_SET_FAILED, ++ "failed to set the key (%s)" ++ " into dict", ++ key); ++ ret = -1; ++ goto out; ++ } ++ break; + case GF_DATA_TYPE_PTR: + case GF_DATA_TYPE_STR_OLD: + value = GF_MALLOC(xpair->value.gfx_value_u.other.other_len + 1, +diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x +index bec0872..6f92b70 100644 +--- a/rpc/xdr/src/glusterfs4-xdr.x ++++ b/rpc/xdr/src/glusterfs4-xdr.x +@@ -46,6 +46,16 @@ struct gfx_iattx { + unsigned int mode; /* type of file and rwx mode */ + }; + ++struct gfx_mdata_iatt { ++ hyper ia_atime; /* last access time */ ++ hyper ia_mtime; /* last modification time */ ++ hyper ia_ctime; /* last status change time */ ++ ++ unsigned int ia_atime_nsec; ++ unsigned int ia_mtime_nsec; ++ unsigned int ia_ctime_nsec; ++}; ++ + union gfx_value switch (gf_dict_data_type_t type) { + case GF_DATA_TYPE_INT: + hyper value_int; +@@ -62,6 +72,8 @@ union gfx_value switch (gf_dict_data_type_t type) { + case GF_DATA_TYPE_PTR: + case GF_DATA_TYPE_STR_OLD: + opaque other<>; ++ case GF_DATA_TYPE_MDATA: ++ gfx_mdata_iatt mdata_iatt; + }; + + /* AUTH */ +diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym +index 22cdf30..dd4ac85 100644 +--- a/rpc/xdr/src/libgfxdr.sym ++++ b/rpc/xdr/src/libgfxdr.sym +@@ -251,6 +251,7 @@ xdr_to_write3args + xdr_vector_round_up + xdr_gfx_read_rsp + xdr_gfx_iattx ++xdr_gfx_mdata_iatt + xdr_gfx_value + xdr_gfx_dict_pair + xdr_gfx_dict +@@ -344,4 +345,4 @@ xdr_compound_req_v2 + xdr_gfx_compound_req + xdr_compound_rsp_v2 + xdr_gfx_compound_rsp +-xdr_gfx_copy_file_range_req +\ No newline at end of file ++xdr_gfx_copy_file_range_req +diff --git a/tests/basic/ctime/ctime-mdata-legacy-files.t b/tests/basic/ctime/ctime-mdata-legacy-files.t +new file mode 100644 +index 0000000..2e782d5 +--- /dev/null ++++ b/tests/basic/ctime/ctime-mdata-legacy-files.t +@@ -0,0 +1,83 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup; ++ ++############################################################################### ++#Replica volume ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr ++TEST $CLI volume set $V0 ctime off ++ ++TEST "mkdir $M0/DIR" ++TEST "echo hello_world > $M0/DIR/FILE" ++ ++#Verify absence of xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE" ++ ++#Enable ctime ++TEST $CLI volume set $V0 ctime on ++sleep 3 ++TEST stat $M0/DIR/FILE ++ ++#Verify presence "trusted.glusterfs.mdata" xattr on backend ++#The lookup above should have created xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE" ++ ++############################################################################### ++#Disperse Volume ++ ++TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2} ++TEST $CLI volume set $V1 performance.stat-prefetch off ++TEST $CLI volume start $V1 ++ ++TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M1; ++ ++#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr ++TEST $CLI volume set $V1 ctime off ++TEST "mkdir $M1/DIR" ++TEST "echo hello_world > $M1/DIR/FILE" ++ ++#Verify absence of xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE" ++ ++#Enable ctime ++TEST $CLI volume set $V1 ctime on ++sleep 3 ++TEST stat $M1/DIR/FILE ++ ++#Verify presence "trusted.glusterfs.mdata" xattr on backend ++#The lookup above should have created xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE" ++ ++cleanup; ++############################################################################### +diff --git a/xlators/features/utime/src/utime-messages.h b/xlators/features/utime/src/utime-messages.h +index bac18ab..bd40265 100644 +--- a/xlators/features/utime/src/utime-messages.h ++++ b/xlators/features/utime/src/utime-messages.h +@@ -23,6 +23,7 @@ + * glfs-message-id.h. + */ + +-GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY); ++GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY, UTIME_MSG_SET_MDATA_FAILED, ++ UTIME_MSG_DICT_SET_FAILED); + + #endif /* __UTIME_MESSAGES_H__ */ +diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c +index 877c751..2a986e7 100644 +--- a/xlators/features/utime/src/utime.c ++++ b/xlators/features/utime/src/utime.c +@@ -9,8 +9,10 @@ + */ + + #include "utime.h" ++#include "utime-helpers.h" + #include "utime-messages.h" + #include "utime-mem-types.h" ++#include <glusterfs/call-stub.h> + + int32_t + gf_utime_invalidate(xlator_t *this, inode_t *inode) +@@ -133,6 +135,124 @@ mem_acct_init(xlator_t *this) + } + + int32_t ++gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int op_ret, int op_errno, ++ dict_t *xdata) ++{ ++ /* Don't fail lookup if mdata setxattr fails */ ++ if (op_ret) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED, ++ "dict set of key for set-ctime-mdata failed"); ++ } ++ call_resume(frame->local); ++ return 0; ++} ++ ++int32_t ++gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *stbuf, dict_t *xdata, ++ struct iatt *postparent) ++{ ++ dict_t *dict = NULL; ++ struct mdata_iatt *mdata = NULL; ++ int ret = 0; ++ loc_t loc = { ++ 0, ++ }; ++ ++ if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) { ++ dict = dict_new(); ++ if (!dict) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ mdata = GF_MALLOC(sizeof(struct mdata_iatt), gf_common_mt_char); ++ if (mdata == NULL) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ iatt_to_mdata(mdata, stbuf); ++ ret = dict_set_mdata(dict, CTIME_MDATA_XDATA_KEY, mdata, _gf_false); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY, ++ "dict set of key for set-ctime-mdata failed"); ++ goto err; ++ } ++ frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk, op_ret, ++ op_errno, inode, stbuf, xdata, ++ postparent); ++ if (!frame->local) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY, ++ "lookup_cbk stub allocation failed"); ++ goto stub_err; ++ } ++ ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, stbuf->ia_gfid); ++ STACK_WIND(frame, gf_utime_set_mdata_setxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, &loc, dict, 0, NULL); ++ ++ dict_unref(dict); ++ inode_unref(loc.inode); ++ return 0; ++ } ++ ++ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata, ++ postparent); ++ return 0; ++ ++err: ++ if (mdata) { ++ GF_FREE(mdata); ++ } ++stub_err: ++ if (dict) { ++ dict_unref(dict); ++ } ++ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); ++ return 0; ++} ++ ++int ++gf_utime_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) ++{ ++ int op_errno = -1; ++ int ret = -1; ++ ++ VALIDATE_OR_GOTO(frame, err); ++ VALIDATE_OR_GOTO(this, err); ++ VALIDATE_OR_GOTO(loc, err); ++ VALIDATE_OR_GOTO(loc->inode, err); ++ ++ xdata = xdata ? dict_ref(xdata) : dict_new(); ++ if (!xdata) { ++ op_errno = ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_int8(xdata, GF_XATTR_MDATA_KEY, 1); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, -ret, UTIME_MSG_DICT_SET_FAILED, ++ "%s: Unable to set dict value for %s", loc->path, ++ GF_XATTR_MDATA_KEY); ++ op_errno = -ret; ++ goto free_dict; ++ } ++ ++ STACK_WIND(frame, gf_utime_set_mdata_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xdata); ++ dict_unref(xdata); ++ return 0; ++ ++free_dict: ++ dict_unref(xdata); ++err: ++ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); ++ return 0; ++} ++ ++int32_t + init(xlator_t *this) + { + utime_priv_t *utime = NULL; +@@ -182,19 +302,27 @@ notify(xlator_t *this, int event, void *data, ...) + } + + struct xlator_fops fops = { +- /* TODO: Need to go through other fops and +- * check if they modify time attributes +- */ +- .rename = gf_utime_rename, .mknod = gf_utime_mknod, +- .readv = gf_utime_readv, .fremovexattr = gf_utime_fremovexattr, +- .open = gf_utime_open, .create = gf_utime_create, +- .mkdir = gf_utime_mkdir, .writev = gf_utime_writev, +- .rmdir = gf_utime_rmdir, .fallocate = gf_utime_fallocate, +- .truncate = gf_utime_truncate, .symlink = gf_utime_symlink, +- .zerofill = gf_utime_zerofill, .link = gf_utime_link, +- .ftruncate = gf_utime_ftruncate, .unlink = gf_utime_unlink, +- .setattr = gf_utime_setattr, .fsetattr = gf_utime_fsetattr, +- .opendir = gf_utime_opendir, .removexattr = gf_utime_removexattr, ++ .rename = gf_utime_rename, ++ .mknod = gf_utime_mknod, ++ .readv = gf_utime_readv, ++ .fremovexattr = gf_utime_fremovexattr, ++ .open = gf_utime_open, ++ .create = gf_utime_create, ++ .mkdir = gf_utime_mkdir, ++ .writev = gf_utime_writev, ++ .rmdir = gf_utime_rmdir, ++ .fallocate = gf_utime_fallocate, ++ .truncate = gf_utime_truncate, ++ .symlink = gf_utime_symlink, ++ .zerofill = gf_utime_zerofill, ++ .link = gf_utime_link, ++ .ftruncate = gf_utime_ftruncate, ++ .unlink = gf_utime_unlink, ++ .setattr = gf_utime_setattr, ++ .fsetattr = gf_utime_fsetattr, ++ .opendir = gf_utime_opendir, ++ .removexattr = gf_utime_removexattr, ++ .lookup = gf_utime_lookup, + }; + struct xlator_cbks cbks = { + .invalidate = gf_utime_invalidate, +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index ea3b69c..d22bbc2 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -2625,6 +2625,9 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + gf_cs_obj_state state = -1; + int i = 0; + int len; ++ struct mdata_iatt mdata_iatt = { ++ 0, ++ }; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); +@@ -2638,6 +2641,20 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + ++ ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); ++ if (ret == 0) { ++ /* This is initiated by lookup when ctime feature is enabled to create ++ * "trusted.glusterfs.mdata" xattr if not present. These are the files ++ * which were created when ctime feature is disabled. ++ */ ++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, &mdata_iatt, ++ &op_errno); ++ if (ret != 0) { ++ op_ret = -1; ++ } ++ goto out; ++ } ++ + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; +diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h +index 3229275..15e23ff 100644 +--- a/xlators/storage/posix/src/posix-messages.h ++++ b/xlators/storage/posix/src/posix-messages.h +@@ -68,6 +68,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED, + P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED, + P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED, + P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED, +- P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED); ++ P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED, ++ P_MSG_NOMEM); + + #endif /* !_GLUSTERD_MESSAGES_H_ */ +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 5a5e6cd..647c0bb 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -245,6 +245,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + if (ret == -1 || !mdata) { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, ++ "Could not allocate mdata. file: %s: gfid: %s", ++ real_path ? real_path : "null", ++ inode ? uuid_utoa(inode->gfid) : "null"); + ret = -1; + goto out; + } +@@ -262,18 +266,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + } + } else { + /* Failed to get mdata from disk, xattr missing. +- * This happens on two cases. +- * 1. File is created before ctime is enabled. +- * 2. On new file creation. +- * +- * Do nothing, just return success. It is as +- * good as ctime feature is not enabled for this +- * file. For files created before ctime is enabled, +- * time attributes gets updated into ctime structure +- * once the metadata modification fop happens and +- * time attributes become consistent eventually. +- * For new files, it would obviously get updated +- * before the fop completion. ++ * This happens when the file is created before ++ * ctime is enabled. + */ + if (stbuf && op_errno != ENOENT) { + ret = 0; +@@ -345,6 +339,54 @@ posix_compare_timespec(struct timespec *first, struct timespec *second) + return first->tv_sec - second->tv_sec; + } + ++int ++posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, ++ struct mdata_iatt *mdata_iatt, int *op_errno) ++{ ++ posix_mdata_t *mdata = NULL; ++ int ret = 0; ++ ++ GF_VALIDATE_OR_GOTO("posix", this, out); ++ GF_VALIDATE_OR_GOTO(this->name, inode, out); ++ ++ LOCK(&inode->lock); ++ { ++ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); ++ if (!mdata) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, ++ "Could not allocate mdata. gfid: %s", ++ uuid_utoa(inode->gfid)); ++ ret = -1; ++ *op_errno = ENOMEM; ++ goto unlock; ++ } ++ ++ mdata->version = 1; ++ mdata->flags = 0; ++ mdata->ctime.tv_sec = mdata_iatt->ia_ctime; ++ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; ++ mdata->atime.tv_sec = mdata_iatt->ia_atime; ++ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec; ++ mdata->mtime.tv_sec = mdata_iatt->ia_mtime; ++ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; ++ ++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ ++ ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, ++ "gfid: %s key:%s ", uuid_utoa(inode->gfid), ++ GF_XATTR_MDATA_KEY); ++ *op_errno = errno; ++ goto unlock; ++ } ++ } ++unlock: ++ UNLOCK(&inode->lock); ++out: ++ return ret; ++} ++ + /* posix_set_mdata_xattr updates the posix_mdata_t based on the flag + * in inode context and stores it on disk + */ +@@ -372,6 +414,9 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + */ + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, ++ "Could not allocate mdata. file: %s: gfid: %s", ++ real_path ? real_path : "null", uuid_utoa(inode->gfid)); + ret = -1; + goto unlock; + } +@@ -386,35 +431,11 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + } else { + /* +- * This is the first time creating the time +- * attr. This happens when you activate this +- * feature, and the legacy file will not have +- * any xattr set. +- * +- * New files will create extended attributes. +- */ +- +- /* +- * TODO: This is wrong approach, because before +- * creating fresh xattr, we should consult +- * to all replica and/or distribution set. +- * +- * We should contact the time management +- * xlators, and ask them to create an xattr. +- */ +- /* We should not be relying on backend file's +- * time attributes to load the initial ctime +- * time attribute structure. This is incorrect +- * as each replica set would have witnessed the +- * file creation at different times. +- * +- * For new file creation, ctime, atime and mtime +- * should be same, hence initiate the ctime +- * structure with the time from the frame. But +- * for the files which were created before ctime +- * feature is enabled, this is not accurate but +- * still fine as the times would get eventually +- * accurate. ++ * This is the first time creating the time attr. This happens ++ * when you activate this feature. On this code path, only new ++ * files will create mdata xattr. The legacy files (files ++ * created before ctime enabled) will not have any xattr set. ++ * The xattr on legacy file will be set via lookup. + */ + + /* Don't create xattr with utimes/utimensat, only update if +diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h +index 3416148..dc25e59 100644 +--- a/xlators/storage/posix/src/posix-metadata.h ++++ b/xlators/storage/posix/src/posix-metadata.h +@@ -53,5 +53,9 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *read_path_put, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out); ++int ++posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, ++ struct mdata_iatt *mdata_iatt, ++ int *op_errno); + + #endif /* _POSIX_METADATA_H */ +-- +1.8.3.1 + diff --git a/SOURCES/0261-features-utime-Fix-mem_put-crash.patch b/SOURCES/0261-features-utime-Fix-mem_put-crash.patch new file mode 100644 index 0000000..2c3fe9e --- /dev/null +++ b/SOURCES/0261-features-utime-Fix-mem_put-crash.patch @@ -0,0 +1,52 @@ +From 1aa175f353325775517daf1d48a19799e0cafc7a Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 22 Jul 2019 20:55:33 +0530 +Subject: [PATCH 261/261] features/utime: Fix mem_put crash + +Problem: +When frame->local is not null FRAME_DESTROY calls mem_put on it. +Since the stub is already destroyed in call_resume(), it leads +to crash + +Fix: +Set frame->local to NULL before calling call_resume() + +Backport of: + > Patch: https://review.gluster.org/23091 + > fixes: bz#1593542 + > Change-Id: I0f8adf406f4cefdb89d7624ba7a9d9c2eedfb1de + > Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1715422 +Change-Id: I0f8adf406f4cefdb89d7624ba7a9d9c2eedfb1de +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/176726 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/features/utime/src/utime.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c +index 2a986e7..e3a80b6 100644 +--- a/xlators/features/utime/src/utime.c ++++ b/xlators/features/utime/src/utime.c +@@ -139,12 +139,14 @@ gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + dict_t *xdata) + { ++ call_stub_t *stub = frame->local; + /* Don't fail lookup if mdata setxattr fails */ + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED, + "dict set of key for set-ctime-mdata failed"); + } +- call_resume(frame->local); ++ frame->local = NULL; ++ call_resume(stub); + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0262-glusterd-ctime-Disable-ctime-by-default.patch b/SOURCES/0262-glusterd-ctime-Disable-ctime-by-default.patch new file mode 100644 index 0000000..95adf23 --- /dev/null +++ b/SOURCES/0262-glusterd-ctime-Disable-ctime-by-default.patch @@ -0,0 +1,78 @@ +From c44c9f2003b703d64a2a06c53f5a2b85e9dc7a11 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Wed, 31 Jul 2019 09:23:42 -0400 +Subject: [PATCH 262/262] glusterd/ctime: Disable ctime by default + +The ctime feature, in combination with gfid2path +causes peformance dip on rename workloads. Hence +disabling the feature by default. + +Change-Id: I280527eea4dc19bba39fb6a5e74760823a056dc9 +Label : DOWNSTREAM ONLY +BUG: 1713890 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177421 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-by: Amar Tumballi Suryanarayan <amarts@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 10 +++++++--- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + xlators/storage/posix/src/posix-common.c | 2 +- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 5e0214e..539e8a5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4382,14 +4382,18 @@ client_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + goto out; + } + } +- /* a. ret will be -1 if features.ctime is not set in the volinfo->dict which +- * means ctime should be loaded into the graph. ++ /* a. ret will be 0 (returned default) if features.ctime is not set ++ * in the volinfo->dict which means ctime should not be loaded into ++ * the graph. It is disabled by default. + * b. ret will be 1 if features.ctime is explicitly turned on through + * volume set and in that case ctime should be loaded into the graph. + * c. ret will be 0 if features.ctime is explicitly turned off and in that + * case ctime shouldn't be loaded into the graph. + */ +- ret = dict_get_str_boolean(set_dict, "features.ctime", -1); ++ ret = dict_get_str_boolean(set_dict, "features.ctime", 0); ++ if (ret == -1) ++ goto out; ++ + if (conf->op_version >= GD_OP_VERSION_5_0 && ret) { + xl = volgen_graph_add(graph, "features/utime", volname); + if (!xl) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 7a83124..8ce338e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3680,7 +3680,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "features.ctime", + .voltype = "features/utime", + .validate_fn = validate_boolean, +- .value = "on", ++ .value = "off", + .option = "!utime", + .op_version = GD_OP_VERSION_4_1_0, + .description = "enable/disable utime translator on the volume.", +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index bfe2cb0..d738692 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -1374,7 +1374,7 @@ struct volume_options posix_options[] = { + "SHA256 checksum. MD5 otherwise."}, + {.key = {"ctime"}, + .type = GF_OPTION_TYPE_BOOL, +- .default_value = "on", ++ .default_value = "off", + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .op_version = {GD_OP_VERSION_4_1_0}, + .tags = {"ctime"}, +-- +1.8.3.1 + diff --git a/SOURCES/0263-tests-fix-ctime-related-tests.patch b/SOURCES/0263-tests-fix-ctime-related-tests.patch new file mode 100644 index 0000000..3c8fabb --- /dev/null +++ b/SOURCES/0263-tests-fix-ctime-related-tests.patch @@ -0,0 +1,75 @@ +From 427dab431f7e8c4c8a01e9e9ed0892708a3d22d2 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 5 Aug 2019 08:33:29 +0530 +Subject: [PATCH 263/265] tests: fix ctime related tests + +With ctime being disabled by default, certain tests need to explicitly +turn this option off to sanitize the functionality + +Label: DOWNSTREAM ONLY + +Change-Id: Id70310b4b09e36bf66756fea447186bb073b5604 +BUG: 1704562 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177704 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/ctime/ctime-noatime.t | 1 + + tests/basic/ctime/ctime-readdir.t | 1 + + tests/bugs/glusterd/bug-1696046.t | 8 +++++--- + 3 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/tests/basic/ctime/ctime-noatime.t b/tests/basic/ctime/ctime-noatime.t +index 609ccbd..a6c8d9c 100644 +--- a/tests/basic/ctime/ctime-noatime.t ++++ b/tests/basic/ctime/ctime-noatime.t +@@ -20,6 +20,7 @@ function atime_compare { + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 ctime on + TEST $CLI volume set $V0 performance.stat-prefetch off + TEST $CLI volume set $V0 performance.read-ahead off + TEST $CLI volume set $V0 performance.quick-read off +diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t +index 4564fc1..fa069b3 100644 +--- a/tests/basic/ctime/ctime-readdir.t ++++ b/tests/basic/ctime/ctime-readdir.t +@@ -9,6 +9,7 @@ TEST glusterd + + TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3}; + TEST $CLI volume set $V0 performance.stat-prefetch on ++TEST $CLI volume set $V0 ctime on + TEST $CLI volume set $V0 performance.readdir-ahead off + TEST $CLI volume start $V0; + +diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t +index e1c1eb2..f7992f5 100644 +--- a/tests/bugs/glusterd/bug-1696046.t ++++ b/tests/bugs/glusterd/bug-1696046.t +@@ -22,6 +22,8 @@ TEST pidof glusterd; + TEST $CLI volume set all cluster.brick-multiplex on + TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3}; + TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3}; ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume set $V1 ctime on + + ## Start volume and verify + TEST $CLI volume start $V0; +@@ -64,9 +66,9 @@ TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG + # Do some operation + touch $M0/file1 + +-# Check debug message debug message should be exist only for V0 +-# Server xlator is common in brick_mux so after enabling DEBUG log +-# some debug message should be available for other xlators like posix ++# Check debug message should exist only for V0 server xlator is common in ++# brick_mux so after enabling DEBUG log some debug message should be available ++# for other xlators like posix + + brick_log_file=$logdir"/bricks/"`brick-log-file-name` + nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l) +-- +1.8.3.1 + diff --git a/SOURCES/0264-gfapi-Fix-deadlock-while-processing-upcall.patch b/SOURCES/0264-gfapi-Fix-deadlock-while-processing-upcall.patch new file mode 100644 index 0000000..41ac9ee --- /dev/null +++ b/SOURCES/0264-gfapi-Fix-deadlock-while-processing-upcall.patch @@ -0,0 +1,259 @@ +From 52dc121c412de9c1cc3058d782b949dc7b25dc3e Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Thu, 25 Jul 2019 12:56:12 +0530 +Subject: [PATCH 264/265] gfapi: Fix deadlock while processing upcall + +As mentioned in bug1733166, there could be potential deadlock +while processing upcalls depending on how each xlator choose +to act on it. The right way of fixing such issues +is to change rpc callback communication process. +- https://github.com/gluster/glusterfs/issues/697 + +Till then, making changes in gfapi layer to avoid any I/O +processing. + +This is backport of below mainline patch +> https://review.gluster.org/#/c/glusterfs/+/23108/ +> bz#1733166 +> https://review.gluster.org/#/c/glusterfs/+/23107/ (release-6) + +Change-Id: I2079e95339e5d761d5060707f4555cfacab95c83 +fixes: bz#1733520 +Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177675 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + api/src/glfs-fops.c | 164 +++++++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 131 insertions(+), 33 deletions(-) + +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index 396f18c..e6adea5 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -34,7 +34,7 @@ + + struct upcall_syncop_args { + struct glfs *fs; +- struct glfs_upcall *up_arg; ++ struct gf_upcall upcall_data; + }; + + #define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1) +@@ -5716,8 +5716,28 @@ out: + static int + upcall_syncop_args_free(struct upcall_syncop_args *args) + { +- if (args && args->up_arg) +- GLFS_FREE(args->up_arg); ++ dict_t *dict = NULL; ++ struct gf_upcall *upcall_data = NULL; ++ ++ if (args) { ++ upcall_data = &args->upcall_data; ++ switch (upcall_data->event_type) { ++ case GF_UPCALL_CACHE_INVALIDATION: ++ dict = ((struct gf_upcall_cache_invalidation *)(upcall_data ++ ->data)) ++ ->dict; ++ break; ++ case GF_UPCALL_RECALL_LEASE: ++ dict = ((struct gf_upcall_recall_lease *)(upcall_data->data)) ++ ->dict; ++ break; ++ } ++ if (dict) ++ dict_unref(dict); ++ ++ GF_FREE(upcall_data->client_uid); ++ GF_FREE(upcall_data->data); ++ } + GF_FREE(args); + return 0; + } +@@ -5727,14 +5747,7 @@ glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque) + { + struct upcall_syncop_args *args = opaque; + +- /* Here we not using upcall_syncop_args_free as application +- * will be cleaning up the args->up_arg using glfs_free +- * post processing upcall. +- */ +- if (ret) { +- upcall_syncop_args_free(args); +- } else +- GF_FREE(args); ++ (void)upcall_syncop_args_free(args); + + return 0; + } +@@ -5743,29 +5756,17 @@ static int + glfs_cbk_upcall_syncop(void *opaque) + { + struct upcall_syncop_args *args = opaque; ++ struct gf_upcall *upcall_data = NULL; + struct glfs_upcall *up_arg = NULL; + struct glfs *fs; ++ int ret = -1; + + fs = args->fs; +- up_arg = args->up_arg; +- +- if (fs->up_cbk && up_arg) { +- (fs->up_cbk)(up_arg, fs->up_data); +- return 0; +- } +- +- return -1; +-} ++ upcall_data = &args->upcall_data; + +-static struct upcall_syncop_args * +-upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data) +-{ +- struct upcall_syncop_args *args = NULL; +- int ret = -1; +- struct glfs_upcall *up_arg = NULL; +- +- if (!fs || !upcall_data) ++ if (!upcall_data) { + goto out; ++ } + + up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall), glfs_release_upcall, + glfs_mt_upcall_entry_t); +@@ -5795,6 +5796,8 @@ upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data) + if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) { + gf_msg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_INVALID_ENTRY, + "Upcall_EVENT_NULL received. Skipping it."); ++ ret = 0; ++ GLFS_FREE(up_arg); + goto out; + } else if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, +@@ -5802,6 +5805,85 @@ upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data) + goto out; + } + ++ if (fs->up_cbk && up_arg) ++ (fs->up_cbk)(up_arg, fs->up_data); ++ ++ /* application takes care of calling glfs_free on up_arg post ++ * their processing */ ++ ++out: ++ return ret; ++} ++ ++static struct gf_upcall_cache_invalidation * ++gf_copy_cache_invalidation(struct gf_upcall_cache_invalidation *src) ++{ ++ struct gf_upcall_cache_invalidation *dst = NULL; ++ ++ if (!src) ++ goto out; ++ ++ dst = GF_CALLOC(1, sizeof(struct gf_upcall_cache_invalidation), ++ glfs_mt_upcall_entry_t); ++ ++ if (!dst) { ++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, ++ "Upcall entry allocation failed."); ++ goto out; ++ } ++ ++ dst->flags = src->flags; ++ dst->expire_time_attr = src->expire_time_attr; ++ dst->stat = src->stat; ++ dst->p_stat = src->p_stat; ++ dst->oldp_stat = src->oldp_stat; ++ ++ if (src->dict) ++ dst->dict = dict_copy_with_ref(src->dict, NULL); ++ ++ return dst; ++out: ++ return NULL; ++} ++ ++static struct gf_upcall_recall_lease * ++gf_copy_recall_lease(struct gf_upcall_recall_lease *src) ++{ ++ struct gf_upcall_recall_lease *dst = NULL; ++ ++ if (!src) ++ goto out; ++ ++ dst = GF_CALLOC(1, sizeof(struct gf_upcall_recall_lease), ++ glfs_mt_upcall_entry_t); ++ ++ if (!dst) { ++ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, ++ "Upcall entry allocation failed."); ++ goto out; ++ } ++ ++ dst->lease_type = src->lease_type; ++ memcpy(dst->tid, src->tid, 16); ++ ++ if (src->dict) ++ dst->dict = dict_copy_with_ref(src->dict, NULL); ++ ++ return dst; ++out: ++ return NULL; ++} ++ ++static struct upcall_syncop_args * ++upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data) ++{ ++ struct upcall_syncop_args *args = NULL; ++ int ret = -1; ++ struct gf_upcall *t_data = NULL; ++ ++ if (!fs || !upcall_data) ++ goto out; ++ + args = GF_CALLOC(1, sizeof(struct upcall_syncop_args), + glfs_mt_upcall_entry_t); + if (!args) { +@@ -5819,15 +5901,31 @@ upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data) + * notification without taking any lock/ref. + */ + args->fs = fs; +- args->up_arg = up_arg; ++ t_data = &(args->upcall_data); ++ t_data->client_uid = gf_strdup(upcall_data->client_uid); + +- /* application takes care of calling glfs_free on up_arg post +- * their processing */ ++ gf_uuid_copy(t_data->gfid, upcall_data->gfid); ++ t_data->event_type = upcall_data->event_type; ++ ++ switch (t_data->event_type) { ++ case GF_UPCALL_CACHE_INVALIDATION: ++ t_data->data = gf_copy_cache_invalidation( ++ (struct gf_upcall_cache_invalidation *)upcall_data->data); ++ break; ++ case GF_UPCALL_RECALL_LEASE: ++ t_data->data = gf_copy_recall_lease( ++ (struct gf_upcall_recall_lease *)upcall_data->data); ++ break; ++ } ++ ++ if (!t_data->data) ++ goto out; + + return args; + out: +- if (up_arg) { +- GLFS_FREE(up_arg); ++ if (ret) { ++ GF_FREE(args->upcall_data.client_uid); ++ GF_FREE(args); + } + + return NULL; +-- +1.8.3.1 + diff --git a/SOURCES/0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch b/SOURCES/0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch new file mode 100644 index 0000000..33663f0 --- /dev/null +++ b/SOURCES/0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch @@ -0,0 +1,47 @@ +From 7455900798446681fea1a2693fac9b423ad9722a Mon Sep 17 00:00:00 2001 +From: Csaba Henk <csaba@redhat.com> +Date: Tue, 9 Jul 2019 11:06:49 +0200 +Subject: [PATCH 265/265] fuse: add missing GF_FREE to fuse_interrupt + +Upstream: +(Reviewed on https://review.gluster.org/c/glusterfs/+/23016) +> Change-Id: Id7e003e4a53d0a0057c1c84e1cd704c80a6cb015 +> Fixes: bz#1728047 +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1734423 +Change-Id: I50640bf9b56349ab9b07140bdce8a45a7d07ba7a +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177298 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index c05866b..1c946a2 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -661,7 +661,7 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg, + " failed to allocate timed message", + finh->unique, fii->unique); + +- return; ++ goto out; + } + + dmsg->fuse_out_header.unique = finh->unique; +@@ -673,6 +673,9 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg, + + send_fuse_timed(this, dmsg); + } ++ ++out: ++ GF_FREE(finh); + } + + /* +-- +1.8.3.1 + diff --git a/SOURCES/0266-geo-rep-Fix-mount-broker-setup-issue.patch b/SOURCES/0266-geo-rep-Fix-mount-broker-setup-issue.patch new file mode 100644 index 0000000..bc4f84e --- /dev/null +++ b/SOURCES/0266-geo-rep-Fix-mount-broker-setup-issue.patch @@ -0,0 +1,63 @@ +From cb9d0fa4bd2664556f0564406037f9fb7fb781a6 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Wed, 31 Jul 2019 15:40:55 +0530 +Subject: [PATCH 266/276] geo-rep: Fix mount broker setup issue + +Even the use builtin 'type' command as in patch [1] +causes issues if argument in question is not part of PATH +environment variable for that user. This patch fixes the +same by doing source /etc/profile. This was already being +used in another part of script. + +[1] https://review.gluster.org/23089 + +Backport of: + > Patch: https://review.gluster.org/23136 + > Change-Id: Iceb78835967ec6a4350983eec9af28398410c002 + > fixes: bz#1734738 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: Iceb78835967ec6a4350983eec9af28398410c002 +BUG: 1734734 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177867 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/src/gverify.sh | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh +index 692c1d6..f5f70d2 100755 +--- a/geo-replication/src/gverify.sh ++++ b/geo-replication/src/gverify.sh +@@ -180,6 +180,8 @@ function main() + > $log_file + + inet6=$7 ++ local cmd_line ++ local ver + + # Use FORCE_BLOCKER flag in the error message to differentiate + # between the errors which the force command should bypass +@@ -206,13 +208,14 @@ function main() + exit 1; + fi; + ++ cmd_line=$(cmd_slave); + if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then +- ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster" ++ ver=$(ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'") + else +- ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "type -p gluster" ++ ver=$(ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'") + fi + +- if [ $? -ne 0 ]; then ++ if [ -z "$ver" ]; then + echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file + exit 1; + fi; +-- +1.8.3.1 + diff --git a/SOURCES/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch b/SOURCES/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch new file mode 100644 index 0000000..0be69e8 --- /dev/null +++ b/SOURCES/0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch @@ -0,0 +1,143 @@ +From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 29 Jul 2019 16:22:10 +0530 +Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal + +Problem: +Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup +if it's not present. In a multi client scenario, there is a race +which results in updating the ctime xattr to older value. + +e.g. Let c1 and c2 be two clients and file1 be the file which +doesn't have the ctime xattr. Let the ctime of file1 be t1. +(from backend, ctime heals time attributes from backend when not present). + +Now following operations are done on mount +c1 -> ls -l /mnt/file1 | c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1; + +The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup, +so both of them tries to heal ctime to time 't1'. If c2 wins the race and +appends the file before c1 heals it, it sets the time to 't1' and updates +it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1' +which is incorrect. + +Solution: +Compare the times during heal and only update the larger time. This is the +general approach used in ctime feature but got missed with healing legacy +files. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/ + +>fixes: bz#1734299 +>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7 +>Signed-off-by: Kotresh HR <khiremat@redhat.com> + +BUG: 1734305 +Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177866 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++------- + 1 file changed, 58 insertions(+), 18 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 647c0bb..57791fa 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + struct mdata_iatt *mdata_iatt, int *op_errno) + { + posix_mdata_t *mdata = NULL; ++ posix_mdata_t imdata = { ++ 0, ++ }; + int ret = 0; ++ gf_boolean_t mdata_already_set = _gf_false; + + GF_VALIDATE_OR_GOTO("posix", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + LOCK(&inode->lock); + { +- mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); +- if (!mdata) { +- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, +- "Could not allocate mdata. gfid: %s", +- uuid_utoa(inode->gfid)); +- ret = -1; +- *op_errno = ENOMEM; +- goto unlock; +- } ++ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); ++ if (ret == 0 && mdata) { ++ mdata_already_set = _gf_true; ++ } else if (ret == -1 || !mdata) { ++ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); ++ if (!mdata) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, ++ "Could not allocate mdata. gfid: %s", ++ uuid_utoa(inode->gfid)); ++ ret = -1; ++ *op_errno = ENOMEM; ++ goto unlock; ++ } ++ ++ ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata, ++ op_errno); ++ if (ret == 0) { ++ /* Got mdata from disk. This is a race, another client ++ * has healed the xattr during lookup. So set it in inode ++ * ctx */ ++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ mdata_already_set = _gf_true; ++ } else { ++ *op_errno = 0; ++ mdata->version = 1; ++ mdata->flags = 0; ++ mdata->ctime.tv_sec = mdata_iatt->ia_ctime; ++ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; ++ mdata->atime.tv_sec = mdata_iatt->ia_atime; ++ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec; ++ mdata->mtime.tv_sec = mdata_iatt->ia_mtime; ++ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; + +- mdata->version = 1; +- mdata->flags = 0; +- mdata->ctime.tv_sec = mdata_iatt->ia_ctime; +- mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; +- mdata->atime.tv_sec = mdata_iatt->ia_atime; +- mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec; +- mdata->mtime.tv_sec = mdata_iatt->ia_mtime; +- mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; ++ __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ } ++ } + +- __inode_ctx_set1(inode, this, (uint64_t *)&mdata); ++ if (mdata_already_set) { ++ /* Compare and update the larger time */ ++ imdata.ctime.tv_sec = mdata_iatt->ia_ctime; ++ imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; ++ imdata.atime.tv_sec = mdata_iatt->ia_atime; ++ imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec; ++ imdata.mtime.tv_sec = mdata_iatt->ia_mtime; ++ imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; ++ ++ if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) { ++ mdata->ctime = imdata.ctime; ++ } ++ if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) { ++ mdata->mtime = imdata.mtime; ++ } ++ if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) { ++ mdata->atime = imdata.atime; ++ } ++ } + + ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata); + if (ret) { +-- +1.8.3.1 + diff --git a/SOURCES/0268-rpc-transport-have-default-listen-port.patch b/SOURCES/0268-rpc-transport-have-default-listen-port.patch new file mode 100644 index 0000000..176a907 --- /dev/null +++ b/SOURCES/0268-rpc-transport-have-default-listen-port.patch @@ -0,0 +1,46 @@ +From 872e344c0ab40c37b1872c32f5d5fddc097a1460 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Mon, 5 Aug 2019 21:16:35 +0530 +Subject: [PATCH 268/276] rpc/transport: have default listen-port + +With release-6, we now can have transport.socket.listen-port parameter +configurable in glusterd.vol. However the default value wasn't defined +in the code and this breaks the backward compatibility where if one has +a modified glusterd.vol file, then post upgrade the same file will be +retained and the new changes introduced as part of the release wouldn't +be available in the glusterd.vol. So it's important that for each new +options introduced in glusterd.vol file backward compatibility is +guaranteed. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23160/ + +>Fixes: bz#1737676 +>Change-Id: I776b28bff786320cda299fe673d824024dc9803e +>Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1734534 +Change-Id: I776b28bff786320cda299fe673d824024dc9803e +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177862 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/name.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c +index ca14402..7f18cc4 100644 +--- a/rpc/rpc-transport/socket/src/name.c ++++ b/rpc/rpc-transport/socket/src/name.c +@@ -367,6 +367,8 @@ af_inet_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr, + listen_port_data = dict_get(options, "transport.socket.listen-port"); + if (listen_port_data) { + listen_port = data_to_uint16(listen_port_data); ++ } else { ++ listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT; + } + + listen_host_data = dict_get(options, "transport.socket.bind-address"); +-- +1.8.3.1 + diff --git a/SOURCES/0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch b/SOURCES/0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch new file mode 100644 index 0000000..f2cb5f2 --- /dev/null +++ b/SOURCES/0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch @@ -0,0 +1,58 @@ +From 7c2d6e82d7d3430ad8a557b6ae726765f7e874e9 Mon Sep 17 00:00:00 2001 +From: Kinglong Mee <kinglongmee@gmail.com> +Date: Fri, 12 Apr 2019 11:35:55 +0800 +Subject: [PATCH 269/276] ec: fix truncate lock to cover the write in tuncate + clean + +ec_truncate_clean does writing under the lock granted for truncate, +but the lock is calculated by ec_adjust_offset_up, so that, +the write in ec_truncate_clean is out of lock. + +fixes: bz#1732770 +Upstream-patch: https://review.gluster.org/c/glusterfs/+/22552 +Change-Id: Idbe1fd48d26afe49c36b77db9f12e0907f5a4134 +Signed-off-by: Kinglong Mee <kinglongmee@gmail.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177973 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-inode-write.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index a903664..ea55140 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -1405,6 +1405,7 @@ int32_t + ec_manager_truncate(ec_fop_data_t *fop, int32_t state) + { + ec_cbk_data_t *cbk; ++ off_t offset_down; + + switch (state) { + case EC_STATE_INIT: +@@ -1416,16 +1417,19 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state) + /* Fall through */ + + case EC_STATE_LOCK: ++ offset_down = fop->user_size; ++ ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true); ++ + if (fop->id == GF_FOP_TRUNCATE) { + ec_lock_prepare_inode( + fop, &fop->loc[0], + EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, +- fop->offset, EC_RANGE_FULL); ++ offset_down, EC_RANGE_FULL); + } else { + ec_lock_prepare_fd( + fop, fop->fd, + EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO, +- fop->offset, EC_RANGE_FULL); ++ offset_down, EC_RANGE_FULL); + } + ec_lock(fop); + +-- +1.8.3.1 + diff --git a/SOURCES/0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch b/SOURCES/0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch new file mode 100644 index 0000000..5015e2c --- /dev/null +++ b/SOURCES/0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch @@ -0,0 +1,42 @@ +From 84d8a0ca5b521b9d87679ffebe420fe69869961d Mon Sep 17 00:00:00 2001 +From: Kinglong Mee <kinglongmee@gmail.com> +Date: Mon, 8 Jul 2019 21:13:28 +0800 +Subject: [PATCH 270/276] cluster/ec: inherit healing from lock when it has + info + +If lock has info, fop should inherit healing mask from it. +Otherwise, fop cannot inherit right healing when changed_flags is zero. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23010 +Change-Id: Ife80c9169d2c555024347a20300b0583f7e8a87f +fixes: bz#1732792 +Signed-off-by: Kinglong Mee <mijinlong@horiscale.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177974 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index e2e582f..db1ff5b 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -1412,11 +1412,12 @@ ec_get_size_version(ec_lock_link_t *link) + set_dirty = ec_set_dirty_flag(link, ctx, dirty); + + /* If ec metadata has already been retrieved, do not try again. */ +- if (ctx->have_info && (!set_dirty)) { ++ if (ctx->have_info) { + if (ec_is_data_fop(fop->id)) { + fop->healing |= lock->healing; + } +- goto unlock; ++ if (!set_dirty) ++ goto unlock; + } + + /* Determine if there's something we need to retrieve for the current +-- +1.8.3.1 + diff --git a/SOURCES/0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch b/SOURCES/0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch new file mode 100644 index 0000000..26ec3e7 --- /dev/null +++ b/SOURCES/0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch @@ -0,0 +1,116 @@ +From 52d71ad0e5c27808e7d8eea8a0920298837e408c Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Wed, 17 Jul 2019 14:50:22 +0200 +Subject: [PATCH 271/276] cluster/ec: fix EIO error for concurrent writes on + sparse files + +EC doesn't allow concurrent writes on overlapping areas, they are +serialized. However non-overlapping writes are serviced in parallel. +When a write is not aligned, EC first needs to read the entire chunk +from disk, apply the modified fragment and write it again. + +The problem appears on sparse files because a write to an offset +implicitly creates data on offsets below it (so, in some way, they +are overlapping). For example, if a file is empty and we read 10 bytes +from offset 10, read() will return 0 bytes. Now, if we write one byte +at offset 1M and retry the same read, the system call will return 10 +bytes (all containing 0's). + +So if we have two writes, the first one at offset 10 and the second one +at offset 1M, EC will send both in parallel because they do not overlap. +However, the first one will try to read missing data from the first chunk +(i.e. offsets 0 to 9) to recombine the entire chunk and do the final write. +This read will happen in parallel with the write to 1M. What could happen +is that half of the bricks process the write before the read, and the +half do the read before the write. Some bricks will return 10 bytes of +data while the otherw will return 0 bytes (because the file on the brick +has not been expanded yet). + +When EC tries to recombine the answers from the bricks, it can't, because +it needs more than half consistent answers to recover the data. So this +read fails with EIO error. This error is propagated to the parent write, +which is aborted and EIO is returned to the application. + +The issue happened because EC assumed that a write to a given offset +implies that offsets below it exist. + +This fix prevents the read of the chunk from bricks if the current size +of the file is smaller than the read chunk offset. This size is +correctly tracked, so this fixes the issue. + +Also modifying ec-stripe.t file for Test #13 within it. +In this patch, if a file size is less than the offset we are writing, we +fill zeros in head and tail and do not consider it strip cache miss. +That actually make sense as we know what data that part holds and there is +no need of reading it from bricks. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23066 +Change-Id: Ic342e8c35c555b8534109e9314c9a0710b6225d6 +fixes: bz#1731448 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177975 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/ec/ec-stripe.t | 2 +- + xlators/cluster/ec/src/ec-inode-write.c | 26 +++++++++++++++++--------- + 2 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/tests/basic/ec/ec-stripe.t b/tests/basic/ec/ec-stripe.t +index 1e940eb..98b9229 100644 +--- a/tests/basic/ec/ec-stripe.t ++++ b/tests/basic/ec/ec-stripe.t +@@ -202,7 +202,7 @@ TEST truncate -s 0 $B0/test_file + TEST truncate -s 0 $M0/test_file + TEST dd if=$B0/misc_file of=$B0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc + TEST dd if=$B0/misc_file of=$M0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc +-check_statedump_md5sum 4 5 ++check_statedump_md5sum 4 4 + clean_file_unmount + + ### 14 - Truncate to invalidate all but one the stripe in cache #### +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index ea55140..a45e6d6 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -2013,20 +2013,28 @@ ec_writev_start(ec_fop_data_t *fop) + if (err != 0) { + goto failed_fd; + } ++ tail = fop->size - fop->user_size - fop->head; + if (fop->head > 0) { +- found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD); +- if (!found_stripe) { +- if (ec_make_internal_fop_xdata(&xdata)) { +- err = -ENOMEM; +- goto failed_xdata; ++ if (current > fop->offset) { ++ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD); ++ if (!found_stripe) { ++ if (ec_make_internal_fop_xdata(&xdata)) { ++ err = -ENOMEM; ++ goto failed_xdata; ++ } ++ ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, ++ ec_writev_merge_head, NULL, fd, ec->stripe_size, ++ fop->offset, 0, xdata); ++ } ++ } else { ++ memset(fop->vector[0].iov_base, 0, fop->head); ++ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail); ++ if (ec->stripe_cache && (fop->size <= ec->stripe_size)) { ++ ec_add_stripe_in_cache(ec, fop); + } +- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, +- ec_writev_merge_head, NULL, fd, ec->stripe_size, +- fop->offset, 0, xdata); + } + } + +- tail = fop->size - fop->user_size - fop->head; + if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) { + /* Current locking scheme will make sure the 'current' below will + * never decrease while the fop is in progress, so the checks will +-- +1.8.3.1 + diff --git a/SOURCES/0272-cluster-ec-Always-read-from-good-mask.patch b/SOURCES/0272-cluster-ec-Always-read-from-good-mask.patch new file mode 100644 index 0000000..a6193e4 --- /dev/null +++ b/SOURCES/0272-cluster-ec-Always-read-from-good-mask.patch @@ -0,0 +1,90 @@ +From 220b95085847b5f6a9e5bee7a9519efe72600e6a Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Thu, 18 Jul 2019 11:25:31 +0530 +Subject: [PATCH 272/276] cluster/ec: Always read from good-mask + +There are cases where fop->mask may have fop->healing added +and readv shouldn't be wound on fop->healing. To avoid this +always wind readv to lock->good_mask + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23069 +fixes: bz#1730914 +Change-Id: I2226ef0229daf5ff315d51e868b980ee48060b87 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177976 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 3 +++ + xlators/cluster/ec/src/ec-inode-write.c | 27 ++++++++++++++++++++++----- + 2 files changed, 25 insertions(+), 5 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index db1ff5b..28b31c9 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -654,6 +654,9 @@ ec_child_select(ec_fop_data_t *fop) + * unlock should go on all subvols where lock is performed*/ + if (fop->parent && !ec_internal_op(fop)) { + fop->mask &= (fop->parent->mask & ~fop->parent->healing); ++ if (ec_is_data_fop(fop->id)) { ++ fop->healing |= fop->parent->healing; ++ } + } + + if ((fop->mask & ~ec->xl_up) != 0) { +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index a45e6d6..4f35b6d 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -1977,6 +1977,20 @@ ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which) + return found; + } + ++static uintptr_t ++ec_get_lock_good_mask(inode_t *inode, xlator_t *xl) ++{ ++ ec_lock_t *lock = NULL; ++ ec_inode_t *ictx = NULL; ++ LOCK(&inode->lock); ++ { ++ ictx = __ec_inode_get(inode, xl); ++ lock = ictx->inode_lock; ++ } ++ UNLOCK(&inode->lock); ++ return lock->good_mask; ++} ++ + void + ec_writev_start(ec_fop_data_t *fop) + { +@@ -2022,9 +2036,10 @@ ec_writev_start(ec_fop_data_t *fop) + err = -ENOMEM; + goto failed_xdata; + } +- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, +- ec_writev_merge_head, NULL, fd, ec->stripe_size, +- fop->offset, 0, xdata); ++ ec_readv(fop->frame, fop->xl, ++ ec_get_lock_good_mask(fop->fd->inode, fop->xl), ++ EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd, ++ ec->stripe_size, fop->offset, 0, xdata); + } + } else { + memset(fop->vector[0].iov_base, 0, fop->head); +@@ -2047,8 +2062,10 @@ ec_writev_start(ec_fop_data_t *fop) + err = -ENOMEM; + goto failed_xdata; + } +- ec_readv(fop->frame, fop->xl, -1, EC_MINIMUM_MIN, +- ec_writev_merge_tail, NULL, fd, ec->stripe_size, ++ ec_readv(fop->frame, fop->xl, ++ ec_get_lock_good_mask(fop->fd->inode, fop->xl), ++ EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd, ++ ec->stripe_size, + fop->offset + fop->size - ec->stripe_size, 0, xdata); + } + } else { +-- +1.8.3.1 + diff --git a/SOURCES/0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch b/SOURCES/0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch new file mode 100644 index 0000000..5c01cb5 --- /dev/null +++ b/SOURCES/0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch @@ -0,0 +1,86 @@ +From d5f931b334ac7abccaf30d277ce3ca9cfae0da5b Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 29 Jul 2019 14:08:37 +0530 +Subject: [PATCH 273/276] cluster/ec: Fix reopen flags to avoid misbehavior + +Problem: +when a file needs to be re-opened O_APPEND and O_EXCL +flags are not filtered in EC. + +- O_APPEND should be filtered because EC doesn't send O_APPEND below EC for +open to make sure writes happen on the individual fragments instead of at the +end of the file. + +- O_EXCL should be filtered because shd could have created the file so even +when file exists open should succeed + +- O_CREAT should be filtered because open happens with gfid as parameter. So +open fop will create just the gfid which will lead to problems. + +Fix: +Filter out these two flags in reopen. + +Upstream-patch:https://review.gluster.org/#/c/glusterfs/+/23121/ +Change-Id: Ia280470fcb5188a09caa07bf665a2a94bce23bc4 +fixes: bz#1735514 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177977 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 4 +++- + xlators/cluster/ec/src/ec-inode-write.c | 7 +++++-- + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 28b31c9..5fb4610 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -101,6 +101,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) + { + uintptr_t need_open = 0; + int ret = 0; ++ int32_t flags = 0; + loc_t loc = { + 0, + }; +@@ -121,6 +122,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) + goto out; + } + ++ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL)); + if (IA_IFDIR == fop->fd->inode->ia_type) { + ec_opendir(fop->frame, fop->xl, need_open, + EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, +@@ -128,7 +130,7 @@ ec_fix_open(ec_fop_data_t *fop, uintptr_t mask) + } else { + ec_open(fop->frame, fop->xl, need_open, + EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc, +- fop->fd->flags & (~O_TRUNC), fop->fd, NULL); ++ flags, fop->fd, NULL); + } + + out: +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index 4f35b6d..2f28e11 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -1985,10 +1985,13 @@ ec_get_lock_good_mask(inode_t *inode, xlator_t *xl) + LOCK(&inode->lock); + { + ictx = __ec_inode_get(inode, xl); +- lock = ictx->inode_lock; ++ if (ictx) ++ lock = ictx->inode_lock; + } + UNLOCK(&inode->lock); +- return lock->good_mask; ++ if (lock) ++ return lock->good_mask; ++ return 0; + } + + void +-- +1.8.3.1 + diff --git a/SOURCES/0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch b/SOURCES/0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch new file mode 100644 index 0000000..0307e25 --- /dev/null +++ b/SOURCES/0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch @@ -0,0 +1,49 @@ +From 4c2aa7adef3df500043dd45614d20c9987e6c0d9 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Fri, 2 Aug 2019 12:05:09 +0530 +Subject: [PATCH 274/276] cluster/ec: Update lock->good_mask on parent fop + failure + +When discard/truncate performs write fop, it should do so +after updating lock->good_mask to make sure readv happens +on the correct mask + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23147 +fixes: bz#1730914 +Change-Id: Idfef0bbcca8860d53707094722e6ba3f81c583b7 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/177978 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-common.h | 2 ++ + xlators/cluster/ec/src/ec-inode-write.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index e948342..3c69471 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -204,4 +204,6 @@ void + ec_reset_entry_healing(ec_fop_data_t *fop); + char * + ec_msg_str(ec_fop_data_t *fop); ++void ++ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop); + #endif /* __EC_COMMON_H__ */ +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index 2f28e11..8bfa3b4 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -89,6 +89,8 @@ ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size) + goto out; + } + ++ if (fop->locks[0].lock) ++ ec_lock_update_good(fop->locks[0].lock, fop); + vector.iov_base = iobuf->ptr; + vector.iov_len = size; + memset(vector.iov_base, 0, vector.iov_len); +-- +1.8.3.1 + diff --git a/SOURCES/0275-cluster-ec-Create-heal-task-with-heal-process-id.patch b/SOURCES/0275-cluster-ec-Create-heal-task-with-heal-process-id.patch new file mode 100644 index 0000000..ba3d85a --- /dev/null +++ b/SOURCES/0275-cluster-ec-Create-heal-task-with-heal-process-id.patch @@ -0,0 +1,74 @@ +From 0864f1ad12394a5748d92aa0ed5b455135426bc3 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Tue, 30 Jul 2019 10:32:39 +0530 +Subject: [PATCH 275/276] cluster/ec: Create heal task with heal process id + +Problem: +ec_data_undo_pending calls syncop_fxattrop->SYNCOP without +a frame. In this case SYNCOP gets the frame of the task. +However, when we create a synctask for heal we provide +frame as NULL. +Now, if the read-only feature is ON, it will receive the +process ID of the shd as 0 and will consider that it as +not an internal process. This will prevent healing of a +file with "Read-only file system" error message log. + +Solution: +While launching heal, create a synctask using frame and set +process id of the SHD which is -6. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23129/ + +>Change-Id: I37195399c85de322cbcac75633888922c4e3db4a +>Fixes: bz#1734252 + +BUG: 1733531 +Change-Id: I37195399c85de322cbcac75633888922c4e3db4a +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/178038 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 2fa1f11..0f0f398 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2647,13 +2647,31 @@ void + ec_launch_heal(ec_t *ec, ec_fop_data_t *fop) + { + int ret = 0; ++ call_frame_t *frame = NULL; ++ ++ frame = create_frame(ec->xl, ec->xl->ctx->pool); ++ if (!frame) { ++ goto out; ++ ret = -1; ++ } ++ ++ ec_owner_set(frame, frame->root); ++ /*Do heal as root*/ ++ frame->root->uid = 0; ++ frame->root->gid = 0; ++ /*Mark the fops as internal*/ ++ frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + + ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done, +- NULL, fop); ++ frame, fop); ++out: + if (ret < 0) { + ec_fop_set_error(fop, ENOMEM); + ec_heal_fail(ec, fop); + } ++ ++ if (frame) ++ STACK_DESTROY(frame->root); + } + + void +-- +1.8.3.1 + diff --git a/SOURCES/0276-features-utime-always-update-ctime-at-setattr.patch b/SOURCES/0276-features-utime-always-update-ctime-at-setattr.patch new file mode 100644 index 0000000..f19663b --- /dev/null +++ b/SOURCES/0276-features-utime-always-update-ctime-at-setattr.patch @@ -0,0 +1,74 @@ +From 7f5658a299081cec4c77d3cca4e70099cd59b1fc Mon Sep 17 00:00:00 2001 +From: Kinglong Mee <kinglongmee@gmail.com> +Date: Mon, 5 Aug 2019 11:08:02 +0800 +Subject: [PATCH 276/276] features/utime: always update ctime at setattr + +For the nfs EXCLUSIVE mode create may sets a later time +to mtime (at verifier), it should not set to ctime for +storage.ctime does not allowed set ctime to a earlier time. + + /* Earlier, mdata was updated only if the existing time is less + * than the time to be updated. This would fail the scenarios + * where mtime can be set to any time using the syscall. Hence + * just updating without comparison. But the ctime is not + * allowed to changed to older date. + */ + +According to kernel's setattr, always set ctime at setattr, +and doesnot set ctime from mtime at storage.ctime. + +>Change-Id: I5cfde6cb7f8939da9617506e3dc80bd840e0d749 +>fixes: bz#1737288 +>Signed-off-by: Kinglong Mee <kinglongmee@gmail.com> +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23154/ + +BUG: 1737705 +Change-Id: I5cfde6cb7f8939da9617506e3dc80bd840e0d749 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/178225 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/features/utime/src/utime-gen-fops-c.py | 13 +------------ + xlators/storage/posix/src/posix-metadata.c | 2 +- + 2 files changed, 2 insertions(+), 13 deletions(-) + +diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py +index 8730a51..a8637ff 100755 +--- a/xlators/features/utime/src/utime-gen-fops-c.py ++++ b/xlators/features/utime/src/utime-gen-fops-c.py +@@ -82,18 +82,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) + { + gl_timespec_get(&frame->root->ctime); +- +- if (!valid) { +- frame->root->flags |= MDATA_CTIME; +- } +- +- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { +- frame->root->flags |= MDATA_CTIME; +- } +- +- if (valid & GF_SET_ATTR_MODE) { +- frame->root->flags |= MDATA_CTIME; +- } ++ frame->root->flags |= MDATA_CTIME; + + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 57791fa..5cbdc98 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -631,7 +631,7 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + tv.tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec); + +- flag.ctime = 1; ++ flag.ctime = 0; + flag.mtime = 1; + flag.atime = 0; + +-- +1.8.3.1 + diff --git a/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch b/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch new file mode 100644 index 0000000..45dada1 --- /dev/null +++ b/SOURCES/0277-geo-rep-Fix-Config-Get-Race.patch @@ -0,0 +1,109 @@ +From f40570f2f784dc61edb061a4931dcfc16bf51e7e Mon Sep 17 00:00:00 2001 +From: Aravinda VK <avishwan@redhat.com> +Date: Mon, 5 Aug 2019 19:00:21 +0530 +Subject: [PATCH 277/284] geo-rep: Fix Config Get Race + +When two threads(sync jobs) in Geo-rep worker calls `gconf.get` and +`gconf.getr`(realtime) at the sametime, `getr` resets the conf object +and other one gets None. Thread Lock is introduced to fix the issue. + +``` + File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", + line 368, in twrap + tf(*aargs) + File "/usr/libexec/glusterfs/python/syncdaemon/master.py", line 1987, + in syncjob + po = self.sync_engine(pb, self.log_err) + File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", + line 1444, in rsync + rconf.ssh_ctl_args + \ +AttributeError: 'NoneType' object has no attribute 'split' +``` + +Backport of: + > Patch: https://review.gluster.org/#/c/glusterfs/+/23158/ + > Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5 + > Updates: bz#1737484 + > Signed-off-by: Aravinda VK <avishwan@redhat.com> + +Change-Id: I9c245e5c36338265354e158f5baa32b119eb2da5 +BUG: 1729915 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/178960 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/gsyncdconfig.py | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index 1fc451f..38f3594 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -17,6 +17,7 @@ import os + import shutil + from string import Template + from datetime import datetime ++from threading import Lock + + + # Global object which can be used in other modules +@@ -35,6 +36,7 @@ class GconfInvalidValue(Exception): + class Gconf(object): + def __init__(self, default_conf_file, custom_conf_file=None, + args={}, extra_tmpl_args={}, override_from_args=False): ++ self.lock = Lock() + self.default_conf_file = default_conf_file + self.custom_conf_file = custom_conf_file + self.tmp_conf_file = None +@@ -163,6 +165,11 @@ class Gconf(object): + if value is not None and not self._is_valid_value(name, value): + raise GconfInvalidValue() + ++ ++ def _load_with_lock(self): ++ with self.lock: ++ self._load() ++ + def _load(self): + self.gconf = {} + self.template_conf = [] +@@ -230,12 +237,19 @@ class Gconf(object): + self._tmpl_substitute() + self._do_typecast() + +- def reload(self): ++ def reload(self, with_lock=True): + if self._is_config_changed(): +- self._load() ++ if with_lock: ++ self._load_with_lock() ++ else: ++ self._load() + +- def get(self, name, default_value=None): +- return self.gconf.get(name, default_value) ++ def get(self, name, default_value=None, with_lock=True): ++ if with_lock: ++ with self.lock: ++ return self.gconf.get(name, default_value) ++ else: ++ return self.gconf.get(name, default_value) + + def getall(self, show_defaults=False, show_non_configurable=False): + cnf = {} +@@ -276,8 +290,9 @@ class Gconf(object): + return cnf + + def getr(self, name, default_value=None): +- self.reload() +- return self.get(name, default_value) ++ with self.lock: ++ self.reload(with_lock=False) ++ return self.get(name, default_value, with_lock=False) + + def get_help(self, name=None): + pass +-- +1.8.3.1 + diff --git a/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch b/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch new file mode 100644 index 0000000..00cb48f --- /dev/null +++ b/SOURCES/0278-geo-rep-Fix-worker-connection-issue.patch @@ -0,0 +1,45 @@ +From 924a25990948c9d76001cf4134fc5a2fcbf5c02c Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Fri, 16 Aug 2019 15:38:49 +0530 +Subject: [PATCH 278/284] geo-rep: Fix worker connection issue + +All the workers connects to primary slave node. It should +connect to available slave nodes in round robin fashion +and choose different slave node if the corresponding slave +node is down. This patch fixes the same. + +Thanks Aravinda for the help in root causing this. + +Backport of: + > Patch: https://review.gluster.org/23247/ + > Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + > Updates: bz#1737484 + +Change-Id: I9f8e7744f4adb8a24833cf173681d109710f98cb +Signed-off-by: Kotresh HR <khiremat@redhat.com> +BUG: 1729915 +Reviewed-on: https://code.engineering.redhat.com/gerrit/178961 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/subcmds.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py +index 4ece7e0..8de7db2 100644 +--- a/geo-replication/syncdaemon/subcmds.py ++++ b/geo-replication/syncdaemon/subcmds.py +@@ -73,7 +73,8 @@ def subcmd_worker(args): + Popen.init_errhandler() + fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + local = GLUSTER("localhost", args.master) +- slavehost, slavevol = args.slave.split("::") ++ slavevol = args.slave.split("::")[-1] ++ slavehost = args.resource_remote + remote = SSH(slavehost, slavevol) + remote.connect_remote() + local.connect() +-- +1.8.3.1 + diff --git a/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch b/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch new file mode 100644 index 0000000..3bbd56c --- /dev/null +++ b/SOURCES/0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch @@ -0,0 +1,253 @@ +From bf24623765817ede84ea47f3265f5e6c2ae17ee7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Tue, 16 Jul 2019 20:36:57 +0530 +Subject: [PATCH 279/284] posix: In brick_mux brick is crashed while start/stop + volume in loop + +Problem: In brick_mux environment sometime brick is crashed while + volume stop/start in a loop.Brick is crashed in janitor task + at the time of accessing priv.If posix priv is cleaned up before + call janitor task then janitor task is crashed. + +Solution: To avoid the crash in brick_mux environment introduce a new + flag janitor_task_stop in posix_private and before send CHILD_DOWN event + wait for update the flag by janitor_task_done + +> Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4 +> fixes: bz#1730409 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit f138d3fa2237e7fa940ecf17153fd700350c4138) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23060/) + +Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4 +fixex: bz#1729971 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/178934 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 3 +++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++-- + xlators/protocol/server/src/server.c | 6 ++++- + xlators/storage/posix/src/posix-common.c | 40 +++++++++++++++++++++++++++++- + xlators/storage/posix/src/posix-helpers.c | 16 ++++++++++++ + xlators/storage/posix/src/posix.h | 3 +++ + 6 files changed, 69 insertions(+), 4 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index b78daad..da551e9 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -861,6 +861,9 @@ struct _xlator { + + /* Flag to notify got CHILD_DOWN event for detach brick */ + uint32_t notify_down; ++ ++ /* Flag to avoid throw duplicate PARENT_DOWN event */ ++ uint32_t parent_down; + }; + + /* This would be the only structure which needs to be exported by +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2aa975b..812c698 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -4082,8 +4082,9 @@ out: + if (msg[0]) { + gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s", + msg); +- gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s", +- new_brickinfo->hostname, new_brickinfo->path); ++ if (new_brickinfo) ++ gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s", ++ new_brickinfo->hostname, new_brickinfo->path); + } + gf_msg_debug("glusterd", 0, "Returning with %d", ret); + return ret; +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index 6ae63ba..a5f09fe 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -580,6 +580,7 @@ server_graph_janitor_threads(void *data) + gf_boolean_t victim_found = _gf_false; + xlator_list_t **trav_p = NULL; + xlator_t *top = NULL; ++ uint32_t parent_down = 0; + + GF_ASSERT(data); + +@@ -598,7 +599,10 @@ server_graph_janitor_threads(void *data) + victim = (*trav_p)->xlator; + if (victim->cleanup_starting && + strcmp(victim->name, victim_name) == 0) { +- victim_found = _gf_true; ++ parent_down = victim->parent_down; ++ victim->parent_down = 1; ++ if (!parent_down) ++ victim_found = _gf_true; + break; + } + } +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index d738692..69857d9 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -146,10 +146,15 @@ int32_t + posix_notify(xlator_t *this, int32_t event, void *data, ...) + { + xlator_t *victim = data; ++ struct posix_private *priv = this->private; ++ int ret = 0; ++ struct timespec sleep_till = { ++ 0, ++ }; + + switch (event) { + case GF_EVENT_PARENT_UP: { +- /* Tell the parent that posix xlator is up */ ++ /* the parent that posix xlator is up */ + default_notify(this, GF_EVENT_CHILD_UP, data); + } break; + +@@ -158,6 +163,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + break; + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); ++ ++ if (priv->janitor) { ++ pthread_mutex_lock(&priv->janitor_mutex); ++ { ++ priv->janitor_task_stop = _gf_true; ++ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, ++ priv->janitor); ++ if (!ret) { ++ clock_gettime(CLOCK_REALTIME, &sleep_till); ++ sleep_till.tv_sec += 1; ++ /* Wait to set janitor_task flag to _gf_false by ++ * janitor_task_done */ ++ while (priv->janitor_task_stop) { ++ (void)pthread_cond_timedwait(&priv->janitor_cond, ++ &priv->janitor_mutex, ++ &sleep_till); ++ clock_gettime(CLOCK_REALTIME, &sleep_till); ++ sleep_till.tv_sec += 1; ++ } ++ } ++ } ++ pthread_mutex_unlock(&priv->janitor_mutex); ++ GF_FREE(priv->janitor); ++ } ++ priv->janitor = NULL; + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; + default: +@@ -1008,6 +1038,8 @@ posix_init(xlator_t *this) + + pthread_mutex_init(&_private->fsync_mutex, NULL); + pthread_cond_init(&_private->fsync_cond, NULL); ++ pthread_mutex_init(&_private->janitor_mutex, NULL); ++ pthread_cond_init(&_private->janitor_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); + ret = posix_spawn_ctx_janitor_thread(this); + if (ret) +@@ -1128,6 +1160,7 @@ posix_fini(xlator_t *this) + (void)gf_thread_cleanup_xint(priv->disk_space_check); + priv->disk_space_check = 0; + } ++ + if (priv->janitor) { + /*TODO: Make sure the synctask is also complete */ + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); +@@ -1135,8 +1168,10 @@ posix_fini(xlator_t *this) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, + "Failed to delete janitor timer"); + } ++ GF_FREE(priv->janitor); + priv->janitor = NULL; + } ++ + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; +@@ -1148,6 +1183,9 @@ posix_fini(xlator_t *this) + GF_FREE(priv->base_path); + LOCK_DESTROY(&priv->lock); + pthread_mutex_destroy(&priv->fsync_mutex); ++ pthread_cond_destroy(&priv->fsync_cond); ++ pthread_mutex_destroy(&priv->janitor_mutex); ++ pthread_cond_destroy(&priv->janitor_cond); + GF_FREE(priv->hostname); + GF_FREE(priv->trash_path); + GF_FREE(priv); +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 07169b5..ef5bfd5 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1432,12 +1432,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data) + this = data; + priv = this->private; + ++ pthread_mutex_lock(&priv->janitor_mutex); ++ { ++ if (priv->janitor_task_stop) { ++ priv->janitor_task_stop = _gf_false; ++ pthread_cond_signal(&priv->janitor_cond); ++ pthread_mutex_unlock(&priv->janitor_mutex); ++ goto out; ++ } ++ } ++ pthread_mutex_unlock(&priv->janitor_mutex); ++ + LOCK(&priv->lock); + { + __posix_janitor_timer_start(this); + } + UNLOCK(&priv->lock); + ++out: + return 0; + } + +@@ -1456,6 +1468,9 @@ posix_janitor_task(void *data) + old_this = THIS; + THIS = this; + ++ if (!priv) ++ goto out; ++ + time(&now); + if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { + if (priv->disable_landfill_purge) { +@@ -1475,6 +1490,7 @@ posix_janitor_task(void *data) + + THIS = old_this; + ++out: + return 0; + } + +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index b0935a7..64288a7 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -203,6 +203,8 @@ struct posix_private { + struct list_head fsyncs; + pthread_mutex_t fsync_mutex; + pthread_cond_t fsync_cond; ++ pthread_mutex_t janitor_mutex; ++ pthread_cond_t janitor_cond; + int fsync_queue_count; + + enum { +@@ -257,6 +259,7 @@ struct posix_private { + + gf_boolean_t fips_mode_rchecksum; + gf_boolean_t ctime; ++ gf_boolean_t janitor_task_stop; + }; + + typedef struct { +-- +1.8.3.1 + diff --git a/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch b/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch new file mode 100644 index 0000000..38b4d48 --- /dev/null +++ b/SOURCES/0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch @@ -0,0 +1,153 @@ +From 2d7d9165c6a8619eef553859b4b7136b8e9ccb55 Mon Sep 17 00:00:00 2001 +From: Anoop C S <anoopcs@redhat.com> +Date: Sat, 10 Aug 2019 10:30:26 +0530 +Subject: [PATCH 280/284] performance/md-cache: Do not skip caching of null + character xattr values + +Null character string is a valid xattr value in file system. But for +those xattrs processed by md-cache, it does not update its entries if +value is null('\0'). This results in ENODATA when those xattrs are +queried afterwards via getxattr() causing failures in basic operations +like create, copy etc in a specially configured Samba setup for Mac OS +clients. + +On the other side snapview-server is internally setting empty string("") +as value for xattrs received as part of listxattr() and are not intended +to be cached. Therefore we try to maintain that behaviour using an +additional dictionary key to prevent updation of entries in getxattr() +and fgetxattr() callbacks in md-cache. + +Credits: Poornima G <pgurusid@redhat.com> + +Backport of https://review.gluster.org/c/glusterfs/+/23206 + +Change-Id: I7859cbad0a06ca6d788420c2a495e658699c6ff7 +Fixes: bz#1732376 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/179048 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/md-cache/bug-1726205.t | 22 +++++++++++++++ + .../features/snapview-server/src/snapview-server.c | 12 ++++++++- + xlators/performance/md-cache/src/md-cache.c | 31 +++++++++------------- + 3 files changed, 45 insertions(+), 20 deletions(-) + create mode 100644 tests/bugs/md-cache/bug-1726205.t + +diff --git a/tests/bugs/md-cache/bug-1726205.t b/tests/bugs/md-cache/bug-1726205.t +new file mode 100644 +index 0000000..795130e +--- /dev/null ++++ b/tests/bugs/md-cache/bug-1726205.t +@@ -0,0 +1,22 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd; ++ ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3}; ++ ++TEST $CLI volume start $V0 ++ ++TEST $CLI volume set $V0 group samba ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST touch $M0/file ++TEST "setfattr -n "user.DosStream.Zone.Identifier:\$DATA" -v '\0' $M0/file" ++TEST "getfattr -n "user.DosStream.Zone.Identifier:\$DATA" -e hex $M0/file | grep -q 0x00" ++ ++cleanup; +diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c +index b4998b8..1d6a5e5 100644 +--- a/xlators/features/snapview-server/src/snapview-server.c ++++ b/xlators/features/snapview-server/src/snapview-server.c +@@ -828,7 +828,8 @@ out: + * back into the dict. But to get the values for those xattrs it has to do the + * getxattr operation on each xattr which might turn out to be a costly + * operation. So for each of the xattrs present in the list, a 0 byte value +- * ("") is set into the dict before unwinding. This can be treated as an ++ * ("") is set into the dict before unwinding. Since ("") is also a valid xattr ++ * value(in a file system) we use an extra key in the same dictionary as an + * indicator to other xlators which want to cache the xattrs (as of now, + * md-cache which caches acl and selinux related xattrs) to not to cache the + * values of the xattrs present in the dict. +@@ -871,6 +872,15 @@ svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size) + list_offset += strlen(keybuffer) + 1; + } /* while (remaining_size > 0) */ + ++ /* Add an additional key to indicate that we don't need to cache these ++ * xattrs(with value "") */ ++ ret = dict_set_str(dict, "glusterfs.skip-cache", ""); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED, ++ "dict set operation for the key glusterfs.skip-cache failed."); ++ goto out; ++ } ++ + ret = 0; + + out: +diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c +index 6e0468f..a6b363f 100644 +--- a/xlators/performance/md-cache/src/md-cache.c ++++ b/xlators/performance/md-cache/src/md-cache.c +@@ -698,25 +698,6 @@ updatefn(dict_t *dict, char *key, data_t *value, void *data) + } + } + +- /* posix xlator as part of listxattr will send both names +- * and values of the xattrs in the dict. But as per man page +- * listxattr is mainly supposed to send names of the all the +- * xattrs. gfapi, as of now will put all the keys it obtained +- * in the dict (sent by posix) into a buffer provided by the +- * caller (thus the values of those xattrs are lost). If some +- * xlator makes gfapi based calls (ex: snapview-server), then +- * it has to unwind the calls by putting those names it got +- * in the buffer again into the dict. But now it would not be +- * having the values for those xattrs. So it might just put +- * a 0 byte value ("") into the dict for each xattr and unwind +- * the call. So the xlators which cache the xattrs (as of now +- * md-cache caches the acl and selinux related xattrs), should +- * not update their cache if the value of a xattr is a 0 byte +- * data (i.e. ""). +- */ +- if (value->len == 1 && value->data[0] == '\0') +- return 0; +- + if (dict_set(u->dict, key, value) < 0) { + u->ret = -1; + return -1; +@@ -2406,6 +2387,12 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + ++ if (dict_get(xattr, "glusterfs.skip-cache")) { ++ gf_msg(this->name, GF_LOG_DEBUG, 0, 0, ++ "Skipping xattr update due to empty value"); ++ goto out; ++ } ++ + mdc_inode_xatt_set(this, local->loc.inode, xdata); + + out: +@@ -2488,6 +2475,12 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + ++ if (dict_get(xattr, "glusterfs.skip-cache")) { ++ gf_msg(this->name, GF_LOG_DEBUG, 0, 0, ++ "Skipping xattr update due to empty value"); ++ goto out; ++ } ++ + mdc_inode_xatt_set(this, local->fd->inode, xdata); + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch b/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch new file mode 100644 index 0000000..5af12d1 --- /dev/null +++ b/SOURCES/0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch @@ -0,0 +1,105 @@ +From fa3cc9971bf1bf4ea52edfedc0cea67a0d6990d1 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Tue, 20 Aug 2019 15:49:40 +0530 +Subject: [PATCH 281/284] ctime: Fix incorrect realtime passed to + frame->root->ctime + +On systems that don't support "timespec_get"(e.g., centos6), it +was using "clock_gettime" with "CLOCK_MONOTONIC" to get unix epoch +time which is incorrect. This patch introduces "timespec_now_realtime" +which uses "clock_gettime" with "CLOCK_REALTIME" which fixes +the issue. + +Backport of: + > Patch: https://review.gluster.org/23274/ + > Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + > fixes: bz#1743652 + +Change-Id: I57be35ce442d7e05319e82112b687eb4f28d7612 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +BUG: 1743611 +Reviewed-on: https://code.engineering.redhat.com/gerrit/179185 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/timespec.h | 2 ++ + libglusterfs/src/libglusterfs.sym | 1 + + libglusterfs/src/timespec.c | 22 ++++++++++++++++++++++ + xlators/features/utime/src/utime-helpers.c | 2 +- + 4 files changed, 26 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/glusterfs/timespec.h b/libglusterfs/src/glusterfs/timespec.h +index 871871d..bb9ab44 100644 +--- a/libglusterfs/src/glusterfs/timespec.h ++++ b/libglusterfs/src/glusterfs/timespec.h +@@ -21,6 +21,8 @@ + void + timespec_now(struct timespec *ts); + void ++timespec_now_realtime(struct timespec *ts); ++void + timespec_adjust_delta(struct timespec *ts, struct timespec delta); + void + timespec_sub(const struct timespec *begin, const struct timespec *end, +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index b161380..467a1b7 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -1073,6 +1073,7 @@ sys_accept + tbf_init + tbf_throttle + timespec_now ++timespec_now_realtime + timespec_sub + timespec_adjust_delta + timespec_cmp +diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c +index c01527f..d0d5005 100644 +--- a/libglusterfs/src/timespec.c ++++ b/libglusterfs/src/timespec.c +@@ -71,6 +71,28 @@ timespec_now(struct timespec *ts) + } + + void ++timespec_now_realtime(struct timespec *ts) ++{ ++#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \ ++ defined GF_BSD_HOST_OS ++ if (0 == clock_gettime(CLOCK_REALTIME, ts)) { ++ return; ++ } ++#endif ++ ++ /* Fall back to gettimeofday()*/ ++ struct timeval tv = { ++ 0, ++ }; ++ if (0 == gettimeofday(&tv, NULL)) { ++ TIMEVAL_TO_TIMESPEC(&tv, ts); ++ return; ++ } ++ ++ return; ++} ++ ++void + timespec_adjust_delta(struct timespec *ts, struct timespec delta) + { + ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000); +diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c +index 79cc014..29d9ad9 100644 +--- a/xlators/features/utime/src/utime-helpers.c ++++ b/xlators/features/utime/src/utime-helpers.c +@@ -17,7 +17,7 @@ gl_timespec_get(struct timespec *ts) + #ifdef TIME_UTC + timespec_get(ts, TIME_UTC); + #else +- timespec_now(ts); ++ timespec_now_realtime(ts); + #endif + } + +-- +1.8.3.1 + diff --git a/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch b/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch new file mode 100644 index 0000000..37a0f12 --- /dev/null +++ b/SOURCES/0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch @@ -0,0 +1,116 @@ +From 98c9fc8d774ae153ca6b44d3337cf5d9f7a030e2 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Fri, 16 Aug 2019 16:07:03 +0530 +Subject: [PATCH 282/284] geo-rep: Fix the name of changelog archive file + +Background: +The processed changelogs are archived each month in a single tar file. +The default format is "archive_YYYYMM.tar" which is specified as "%%Y%%m" +in configuration file. + +Problem: +The created changelog archive file didn't have corresponding year +and month. It created as "archive_%Y%m.tar" on python2 only systems. + +Cause and Fix: +Geo-rep expects "%Y%m" after the ConfigParser reads it from config file. +Since it was "%%Y%%m" in config file, geo-rep used to get correct value +"%Y%m" in python3 and "%%Y%%m" in python2 which is incorrect. +The fix can be to use "%Y%m" in config file but that fails in python3. +So the fix is to use "RawConfigParser" in geo-rep and use "%Y%m". This +works both in python2 and python3. + +Backport of: + > Patch: https://review.gluster.org/23248 + > Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c + > fixes: bz#1741890 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: Ie5b7d2bc04d0d53cd1769e064c2d67aaf95d557c +BUG: 1743634 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/179188 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/gsyncd.conf.in | 2 +- + geo-replication/syncdaemon/gsyncdconfig.py | 14 +++++++------- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index c2e4f0d..5ebd57a 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -109,7 +109,7 @@ type=int + help=Minimum time interval in seconds for passive worker to become Active + + [changelog-archive-format] +-value=%%Y%%m ++value=%Y%m + help=Processed changelogs will be archived in working directory. Pattern for archive file + + [use-meta-volume] +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index 38f3594..f823311 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -10,9 +10,9 @@ + # + + try: +- from ConfigParser import ConfigParser, NoSectionError ++ from ConfigParser import RawConfigParser, NoSectionError + except ImportError: +- from configparser import ConfigParser, NoSectionError ++ from configparser import RawConfigParser, NoSectionError + import os + import shutil + from string import Template +@@ -94,7 +94,7 @@ class Gconf(object): + if name != "all" and not self._is_configurable(name): + raise GconfNotConfigurable() + +- cnf = ConfigParser() ++ cnf = RawConfigParser() + with open(self.custom_conf_file) as f: + cnf.readfp(f) + +@@ -138,7 +138,7 @@ class Gconf(object): + if curr_val == value: + return True + +- cnf = ConfigParser() ++ cnf = RawConfigParser() + with open(self.custom_conf_file) as f: + cnf.readfp(f) + +@@ -178,7 +178,7 @@ class Gconf(object): + self.session_conf_items = [] + self.default_values = {} + +- conf = ConfigParser() ++ conf = RawConfigParser() + # Default Template config file + with open(self.default_conf_file) as f: + conf.readfp(f) +@@ -342,7 +342,7 @@ class Gconf(object): + return False + + def is_config_file_old(config_file, mastervol, slavevol): +- cnf = ConfigParser() ++ cnf = RawConfigParser() + cnf.read(config_file) + session_section = "peers %s %s" % (mastervol, slavevol) + try: +@@ -357,7 +357,7 @@ def config_upgrade(config_file, ret): + shutil.copyfile(config_file, config_file_backup) + + #write a new config file +- config = ConfigParser() ++ config = RawConfigParser() + config.add_section('vars') + + for key, value in ret.items(): +-- +1.8.3.1 + diff --git a/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch b/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch new file mode 100644 index 0000000..eb9d8f8 --- /dev/null +++ b/SOURCES/0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch @@ -0,0 +1,285 @@ +From 55eb2e7642e3428eaa1b2d833c0daa1d34b98324 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Thu, 8 Aug 2019 10:05:12 +0530 +Subject: [PATCH 283/284] ctime: Fix ctime issue with utime family of syscalls + +When atime|mtime is updated via utime family of syscalls, +ctime is not updated. This patch fixes the same. + +Backport of: + > Patch: https://review.gluster.org/23177 + > Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25 + > fixes: bz#1738786 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: I7f86d8f8a1e06a332c3449b5bbdbf128c9690f25 +BUG: 1743627 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/179184 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/utime/src/utime-gen-fops-c.py | 13 +++- + xlators/storage/posix/src/posix-inode-fd-ops.c | 8 +-- + xlators/storage/posix/src/posix-metadata.c | 96 ++++++++++++++------------ + xlators/storage/posix/src/posix-metadata.h | 3 +- + 4 files changed, 68 insertions(+), 52 deletions(-) + +diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py +index a8637ff..8730a51 100755 +--- a/xlators/features/utime/src/utime-gen-fops-c.py ++++ b/xlators/features/utime/src/utime-gen-fops-c.py +@@ -82,7 +82,18 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) + { + gl_timespec_get(&frame->root->ctime); +- frame->root->flags |= MDATA_CTIME; ++ ++ if (!valid) { ++ frame->root->flags |= MDATA_CTIME; ++ } ++ ++ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { ++ frame->root->flags |= MDATA_CTIME; ++ } ++ ++ if (valid & GF_SET_ATTR_MODE) { ++ frame->root->flags |= MDATA_CTIME; ++ } + + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index d22bbc2..e0ea85b 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -425,8 +425,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + real_path); + goto out; + } +- posix_update_utime_in_mdata(this, real_path, -1, loc->inode, stbuf, +- valid); ++ posix_update_utime_in_mdata(this, real_path, -1, loc->inode, ++ &frame->root->ctime, stbuf, valid); + } + + if (valid & GF_SET_ATTR_CTIME && !priv->ctime) { +@@ -652,8 +652,8 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + fd); + goto out; + } +- posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, stbuf, +- valid); ++ posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, ++ &frame->root->ctime, stbuf, valid); + } + + if (!valid) { +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 5cbdc98..532daa2 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -432,8 +432,10 @@ out: + */ + static int + posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, +- inode_t *inode, struct timespec *time, struct iatt *stbuf, +- posix_mdata_flag_t *flag, gf_boolean_t update_utime) ++ inode_t *inode, struct timespec *time, ++ struct timespec *u_atime, struct timespec *u_mtime, ++ struct iatt *stbuf, posix_mdata_flag_t *flag, ++ gf_boolean_t update_utime) + { + posix_mdata_t *mdata = NULL; + int ret = -1; +@@ -443,6 +445,10 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, time, out); + ++ if (update_utime && (!u_atime || !u_mtime)) { ++ goto out; ++ } ++ + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); +@@ -506,32 +512,30 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + } + } + +- /* Earlier, mdata was updated only if the existing time is less +- * than the time to be updated. This would fail the scenarios +- * where mtime can be set to any time using the syscall. Hence +- * just updating without comparison. But the ctime is not +- * allowed to changed to older date. +- */ +- +- if (flag->ctime && posix_compare_timespec(time, &mdata->ctime) > 0) { +- mdata->ctime = *time; +- } +- + /* In distributed systems, there could be races with fops + * updating mtime/atime which could result in different + * mtime/atime for same file. So this makes sure, only the + * highest time is retained. If the mtime/atime update comes + * from the explicit utime syscall, it is allowed to set to +- * previous time ++ * previous or future time but the ctime is always set to ++ * current time. + */ + if (update_utime) { ++ if (flag->ctime && ++ posix_compare_timespec(time, &mdata->ctime) > 0) { ++ mdata->ctime = *time; ++ } + if (flag->mtime) { +- mdata->mtime = *time; ++ mdata->mtime = *u_mtime; + } + if (flag->atime) { +- mdata->atime = *time; ++ mdata->atime = *u_atime; + } + } else { ++ if (flag->ctime && ++ posix_compare_timespec(time, &mdata->ctime) > 0) { ++ mdata->ctime = *time; ++ } + if (flag->mtime && + posix_compare_timespec(time, &mdata->mtime) > 0) { + mdata->mtime = *time; +@@ -584,15 +588,22 @@ out: + */ + void + posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, +- inode_t *inode, struct iatt *stbuf, int valid) ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid) + { + int32_t ret = 0; + #if defined(HAVE_UTIMENSAT) +- struct timespec tv = { ++ struct timespec tv_atime = { ++ 0, ++ }; ++ struct timespec tv_mtime = { + 0, + }; + #else +- struct timeval tv = { ++ struct timeval tv_atime = { ++ 0, ++ }; ++ struct timeval tv_mtime = { + 0, + }; + #endif +@@ -611,35 +622,28 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + */ + if (inode && priv->ctime) { + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { +- tv.tv_sec = stbuf->ia_atime; +- SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec); ++ tv_atime.tv_sec = stbuf->ia_atime; ++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec); + +- flag.ctime = 0; +- flag.mtime = 0; ++ flag.ctime = 1; + flag.atime = 1; +- ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, +- &flag, _gf_true); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +- "posix set mdata atime failed on file:" +- " %s gfid:%s", +- real_path, uuid_utoa(inode->gfid)); +- } + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { +- tv.tv_sec = stbuf->ia_mtime; +- SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec); ++ tv_mtime.tv_sec = stbuf->ia_mtime; ++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec); + +- flag.ctime = 0; ++ flag.ctime = 1; + flag.mtime = 1; +- flag.atime = 0; ++ } + +- ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, +- &flag, _gf_true); ++ if (flag.mtime || flag.atime) { ++ ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime, ++ &tv_atime, &tv_mtime, NULL, &flag, ++ _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +- "posix set mdata mtime failed on file:" ++ "posix set mdata atime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } +@@ -702,8 +706,8 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + goto out; + } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, +- &frame->root->ctime, stbuf, &flag, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf, ++ &flag, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, +@@ -733,8 +737,8 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, + goto out; + } + ret = posix_set_mdata_xattr(this, real_path, fd, inode, +- &frame->root->ctime, stbuf, &flag, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf, ++ &flag, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path, +@@ -792,8 +796,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + flag_dup.atime = 0; + + ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, +- &frame->root->ctime, stbuf_out, &flag_dup, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf_out, ++ &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_out, +@@ -811,8 +815,8 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + flag_dup.ctime = 0; + + ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, +- &frame->root->ctime, stbuf_out, &flag_dup, +- _gf_false); ++ &frame->root->ctime, NULL, NULL, stbuf_out, ++ &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_in, +diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h +index dc25e59..c176699 100644 +--- a/xlators/storage/posix/src/posix-metadata.h ++++ b/xlators/storage/posix/src/posix-metadata.h +@@ -40,7 +40,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, + inode_t *inode, struct iatt *stbuf); + void + posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, +- inode_t *inode, struct iatt *stbuf, int valid); ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid); + void + posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + int fd, inode_t *inode, struct iatt *stbuf); +-- +1.8.3.1 + diff --git a/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch b/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch new file mode 100644 index 0000000..4078bfc --- /dev/null +++ b/SOURCES/0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch @@ -0,0 +1,61 @@ +From 243075b593c6fccbffb3e82ffcfdb58acfd68269 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Thu, 22 Aug 2019 15:51:43 +0530 +Subject: [PATCH 284/284] posix: log aio_error return codes in + posix_fs_health_check + +Problem: Sometime brick is going down to health check thread is + failed without logging error codes return by aio system calls. + As per aio_error man page it returns a positive error number + if the asynchronous I/O operation failed. + +Solution: log aio_error return codes in error message + +> Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef +> Fixes: bz#1744519 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23284/ + +Change-Id: I2496b1bc16e602b0fd3ad53e211de11ec8c641ef +BUG: 1744518 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/179211 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-helpers.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index ef5bfd5..d143d4c 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2025,7 +2025,6 @@ posix_fs_health_check(xlator_t *this) + if (ret != 0) { + op_errno = errno; + op = "aio_write_error"; +- ret = -1; + goto out; + } + +@@ -2064,7 +2063,6 @@ posix_fs_health_check(xlator_t *this) + if (ret != 0) { + op_errno = errno; + op = "aio_read_error"; +- ret = -1; + goto out; + } + +@@ -2089,7 +2087,8 @@ out: + } + if (ret && file_path[0]) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, +- "%s() on %s returned", op, file_path); ++ "%s() on %s returned ret is %d error is %s", op, file_path, ret, ++ ret != -1 ? strerror(ret) : strerror(op_errno)); + gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, + "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, + file_path, strerror(op_errno), priv->hostname, priv->base_path, +-- +1.8.3.1 + diff --git a/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch b/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch new file mode 100644 index 0000000..12549e7 --- /dev/null +++ b/SOURCES/0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch @@ -0,0 +1,43 @@ +From 49cd9ef7487ba88796315b897823837a9cbd535e Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Wed, 28 Aug 2019 09:05:20 +0530 +Subject: [PATCH 285/297] glusterd: glusterd service is getting timed out on + scaled setup + +Problem: On a three node cluster with 2000 replica volumes systemctl is getting + timed out for glusted service. + +Solution: Configure TimeoutSec 300 to wait for glusterd startup. + +> Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d +> Fixes: bz#1746228 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit c90dc63ec9eee0f43ba8e489876fdf8b8810bbdc) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23316/) + +Change-Id: Idb3f3f3e56e6216a0ebd754cbb9e8e37ce9e636d +BUG: 1746027 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/179806 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/systemd/glusterd.service.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in +index c33351c..f604160 100644 +--- a/extras/systemd/glusterd.service.in ++++ b/extras/systemd/glusterd.service.in +@@ -13,6 +13,7 @@ Environment="LOG_LEVEL=INFO" + EnvironmentFile=-@sysconfdir@/sysconfig/glusterd + ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS + KillMode=process ++TimeoutSec=300 + SuccessExitStatus=15 + + [Install] +-- +1.8.3.1 + diff --git a/SOURCES/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch b/SOURCES/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch new file mode 100644 index 0000000..415a07b --- /dev/null +++ b/SOURCES/0286-glusterfs.spec.in-added-script-files-for-machine-com.patch @@ -0,0 +1,162 @@ +From 2a905a8ae6b4737e84543ad76b55f3346fa0f32c Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Tue, 27 Aug 2019 14:12:31 +0530 +Subject: [PATCH 286/297] glusterfs.spec.in: added script files for machine / + component stats + +Have added the file (extras/identify-hangs.sh) to the code base. +And included the following to be packaged: + +Quota Accounting issue: +extras/quota/xattr_analysis.py (made available only on server) +extras/quota/quota_fsck.py (made available only on server) +extras/quota/log_accounting.sh + +Debugging Statedumps: +extras/identify-hangs.sh + +Performance: +extras/collect-system-stats.sh + +Note: rest of the files were already included. + +Label: DOWNSTREAM ONLY. + +Change-Id: I2efb959865c3f381166c6a25c6eef613d13dd5ee +fixes: bz#1719171 +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/179515 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + extras/Makefile.am | 9 +++++++- + extras/identify-hangs.sh | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ + glusterfs.spec.in | 8 ++++++++ + 3 files changed, 69 insertions(+), 1 deletion(-) + create mode 100644 extras/identify-hangs.sh + +diff --git a/extras/Makefile.am b/extras/Makefile.am +index 983f014..8cbfda1 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -30,9 +30,14 @@ endif + + scriptsdir = $(datadir)/glusterfs/scripts + scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh ++scripts_SCRIPTS += quota/log_accounting.sh ++scripts_SCRIPTS += collect-system-stats.sh ++scripts_SCRIPTS += identify-hangs.sh + if WITH_SERVER + scripts_SCRIPTS += post-upgrade-script-for-quota.sh \ + pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh ++scripts_SCRIPTS += quota/quota_fsck.py ++scripts_SCRIPTS += quota/xattr_analysis.py + if USE_SYSTEMD + scripts_SCRIPTS += control-cpu-load.sh + scripts_SCRIPTS += control-mem.sh +@@ -50,7 +55,9 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.co + command-completion/Makefile command-completion/README \ + stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \ + control-cpu-load.sh control-mem.sh group-distributed-virt \ +- thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh ++ thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh \ ++ quota/xattr_analysis.py quota/quota_fsck.py quota/log_accounting.sh \ ++ collect-system-stats.sh identify-hangs.sh + + if WITH_SERVER + install-data-local: +diff --git a/extras/identify-hangs.sh b/extras/identify-hangs.sh +new file mode 100644 +index 0000000..ebc6bf1 +--- /dev/null ++++ b/extras/identify-hangs.sh +@@ -0,0 +1,53 @@ ++#!/bin/bash ++function get_statedump_fnames_without_timestamps ++{ ++ ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u ++} ++ ++function get_non_uniq_fields ++{ ++ local statedump_fname_prefix=$1 ++ print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}' ++} ++ ++function print_stack_lkowner_unique_in_one_line ++{ ++ local statedump_fname_prefix=$1 ++ sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - - ++} ++ ++function get_stacks_that_appear_in_multiple_statedumps ++{ ++ #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps ++ #print the stack ++ local statedump_fname_prefix=$1 ++ while read -r non_uniq_stack; ++ do ++ if [ -z "$printed" ]; ++ then ++ printed="1" ++ fi ++ echo "$statedump_fname_prefix" "$non_uniq_stack" ++ done < <(get_non_uniq_fields "$statedump_fname_prefix") ++} ++ ++statedumpdir=${1} ++if [ -z "$statedumpdir" ]; ++then ++ echo "Usage: $0 <statedump-dir>" ++ exit 1 ++fi ++ ++if [ ! -d "$statedumpdir" ]; ++then ++ echo "$statedumpdir: Is not a directory" ++ echo "Usage: $0 <statedump-dir>" ++ exit 1 ++fi ++ ++cd "$statedumpdir" || exit 1 ++for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps); ++do ++ get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix" ++done | column -t ++echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2 +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 00603ec..3c2e2dc 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1107,6 +1107,9 @@ exit 0 + %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh + %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh + %endif ++%{_datadir}/glusterfs/scripts/identify-hangs.sh ++%{_datadir}/glusterfs/scripts/collect-system-stats.sh ++%{_datadir}/glusterfs/scripts/log_accounting.sh + # xlators that are needed on the client- and on the server-side + %dir %{_libdir}/glusterfs + %dir %{_libdir}/glusterfs/%{version}%{?prereltag} +@@ -1352,6 +1355,8 @@ exit 0 + %if ( 0%{!?_without_server:1} ) + %files server + %doc extras/clear_xattrs.sh ++%{_datadir}/glusterfs/scripts/xattr_analysis.py* ++%{_datadir}/glusterfs/scripts/quota_fsck.py* + # sysconf + %config(noreplace) %{_sysconfdir}/glusterfs + %exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol +@@ -1942,6 +1947,9 @@ fi + %endif + + %changelog ++* Tue Aug 27 2019 Hari Gowtham <hgowtham@redhat.com> ++- Added scripts to collect machine stats and component stats (#1719171) ++ + * Tue Jun 18 2019 Jiffin Tony Thottan <jthottan@redhat.com> + - build glusterfs-ganesha for rhel 7 and above (#1720551) + +-- +1.8.3.1 + diff --git a/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch b/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch new file mode 100644 index 0000000..93bd3c9 --- /dev/null +++ b/SOURCES/0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch @@ -0,0 +1,372 @@ +From 546f412c155dd5aca2b3cd4202f80c9977b215dc Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Wed, 4 Sep 2019 12:06:34 +0530 +Subject: [PATCH 287/297] cluster/ec: Fail fsync/flush for files on update + size/version failure + +Problem: +If update size/version is not successful on the file, updates on the +same stripe could lead to data corruptions if the earlier un-aligned +write is not successful on all the bricks. Application won't have +any knowledge of this because update size/version happens in the +background. + +Fix: +Fail fsync/flush on fds that are opened before update-size-version +went bad. + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23355 +fixes: bz#1745107 +Change-Id: I9d323eddcda703bd27d55f340c4079d76e06e492 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/180672 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/ec/ec-badfd.c | 124 +++++++++++++++++++++++++++++++++++ + tests/basic/ec/ec-badfd.t | 26 ++++++++ + xlators/cluster/ec/src/ec-common.c | 23 +++++++ + xlators/cluster/ec/src/ec-generic.c | 47 +++++++++++++ + xlators/cluster/ec/src/ec-helpers.c | 7 ++ + xlators/cluster/ec/src/ec-messages.h | 2 +- + xlators/cluster/ec/src/ec-types.h | 2 + + 7 files changed, 230 insertions(+), 1 deletion(-) + create mode 100644 tests/basic/ec/ec-badfd.c + create mode 100755 tests/basic/ec/ec-badfd.t + +diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c +new file mode 100644 +index 0000000..8be23c1 +--- /dev/null ++++ b/tests/basic/ec/ec-badfd.c +@@ -0,0 +1,124 @@ ++#include <stdio.h> ++#include <fcntl.h> ++#include <unistd.h> ++#include <time.h> ++#include <limits.h> ++#include <string.h> ++#include <stdlib.h> ++#include <errno.h> ++#include <glusterfs/api/glfs.h> ++#include <glusterfs/api/glfs-handles.h> ++ ++int ++fill_iov(struct iovec *iov, char fillchar, int count) ++{ ++ int ret = -1; ++ ++ iov->iov_base = malloc(count + 1); ++ if (iov->iov_base == NULL) { ++ return ret; ++ } else { ++ iov->iov_len = count; ++ ret = 0; ++ } ++ memset(iov->iov_base, fillchar, count); ++ memset(iov->iov_base + count, '\0', 1); ++ ++ return ret; ++} ++ ++int ++write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count) ++{ ++ ssize_t ret = -1; ++ int flags = O_RDWR; ++ struct iovec iov = {0}; ++ ++ ret = fill_iov(&iov, 'a', char_count); ++ if (ret) { ++ fprintf(stderr, "failed to create iov"); ++ goto out; ++ } ++ ++ ret = glfs_pwritev(glfd, &iov, 1, 0, flags); ++out: ++ if (ret < 0) { ++ fprintf(stderr, "glfs_pwritev failed, %d", errno); ++ } ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ glfs_t *fs = NULL; ++ glfs_fd_t *fd = NULL; ++ int ret = 1; ++ char volume_cmd[4096] = {0}; ++ ++ if (argc != 4) { ++ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]); ++ return 1; ++ } ++ ++ fs = glfs_new(argv[2]); ++ if (!fs) { ++ fprintf(stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_set_logging: returned %d\n", ret); ++ goto out; ++ } ++ ret = glfs_init(fs); ++ if (ret != 0) { ++ fprintf(stderr, "glfs_init: returned %d\n", ret); ++ goto out; ++ } ++ ++ fd = glfs_open(fs, argv[3], O_RDWR); ++ if (fd == NULL) { ++ fprintf(stderr, "glfs_open: returned NULL\n"); ++ goto out; ++ } ++ ++ ret = write_sync(fs, fd, 16); ++ if (ret < 0) { ++ fprintf(stderr, "write_sync failed\n"); ++ } ++ ++ snprintf(volume_cmd, sizeof(volume_cmd), ++ "gluster --mode=script volume stop %s", argv[2]); ++ /*Stop the volume so that update-size-version fails*/ ++ system(volume_cmd); ++ sleep(8); /* 3 seconds more than eager-lock-timeout*/ ++ snprintf(volume_cmd, sizeof(volume_cmd), ++ "gluster --mode=script volume start %s", argv[2]); ++ system(volume_cmd); ++ sleep(8); /*wait for bricks to come up*/ ++ ret = glfs_fsync(fd, NULL, NULL); ++ if (ret == 0) { ++ fprintf(stderr, "fsync succeeded on a BADFD\n"); ++ exit(1); ++ } ++ ++ ret = glfs_close(fd); ++ if (ret == 0) { ++ fprintf(stderr, "flush succeeded on a BADFD\n"); ++ exit(1); ++ } ++ ret = 0; ++ ++out: ++ unlink("/tmp/ec-badfd.log"); ++ glfs_fini(fs); ++ ++ return ret; ++} +diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t +new file mode 100755 +index 0000000..56feb47 +--- /dev/null ++++ b/tests/basic/ec/ec-badfd.t +@@ -0,0 +1,26 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6} ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5 ++ ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++TEST $GFS -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++TEST touch $M0/file ++ ++TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2 ++TEST $(dirname $0)/ec-badfd $H0 $V0 /file ++cleanup_tester $(dirname ${0})/ec-badfd ++ ++cleanup; +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 5fb4610..92d4e5d 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2255,6 +2255,23 @@ ec_unlock_lock(ec_lock_link_t *link) + } + } + ++void ++ec_inode_bad_inc(inode_t *inode, xlator_t *xl) ++{ ++ ec_inode_t *ctx = NULL; ++ ++ LOCK(&inode->lock); ++ { ++ ctx = __ec_inode_get(inode, xl); ++ if (ctx == NULL) { ++ goto unlock; ++ } ++ ctx->bad_version++; ++ } ++unlock: ++ UNLOCK(&inode->lock); ++} ++ + int32_t + ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, +@@ -2270,6 +2287,12 @@ ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this, + ctx = lock->ctx; + + if (op_ret < 0) { ++ if (link->lock->fd == NULL) { ++ ec_inode_bad_inc(link->lock->loc.inode, this); ++ } else { ++ ec_inode_bad_inc(link->lock->fd->inode, this); ++ } ++ + gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno, + EC_MSG_SIZE_VERS_UPDATE_FAIL, + "Failed to update version and size. %s", ec_msg_str(fop)); +diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c +index acc16b5..b019050 100644 +--- a/xlators/cluster/ec/src/ec-generic.c ++++ b/xlators/cluster/ec/src/ec-generic.c +@@ -150,6 +150,37 @@ ec_manager_flush(ec_fop_data_t *fop, int32_t state) + } + } + ++static int32_t ++ec_validate_fd(fd_t *fd, xlator_t *xl) ++{ ++ uint64_t iversion = 0; ++ uint64_t fversion = 0; ++ ec_inode_t *inode_ctx = NULL; ++ ec_fd_t *fd_ctx = NULL; ++ ++ LOCK(&fd->lock); ++ { ++ fd_ctx = __ec_fd_get(fd, xl); ++ if (fd_ctx) { ++ fversion = fd_ctx->bad_version; ++ } ++ } ++ UNLOCK(&fd->lock); ++ ++ LOCK(&fd->inode->lock); ++ { ++ inode_ctx = __ec_inode_get(fd->inode, xl); ++ if (inode_ctx) { ++ iversion = inode_ctx->bad_version; ++ } ++ } ++ UNLOCK(&fd->inode->lock); ++ if (fversion < iversion) { ++ return EBADF; ++ } ++ return 0; ++} ++ + void + ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd, +@@ -165,6 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } ++ + fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, + ec_wind_flush, ec_manager_flush, callback, data); + if (fop == NULL) { +@@ -381,6 +420,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } ++ + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, + ec_wind_fsync, ec_manager_fsync, callback, data); + if (fop == NULL) { +diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c +index 43f6e3b..baac001 100644 +--- a/xlators/cluster/ec/src/ec-helpers.c ++++ b/xlators/cluster/ec/src/ec-helpers.c +@@ -753,6 +753,7 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) + { + int i = 0; + ec_fd_t *ctx = NULL; ++ ec_inode_t *ictx = NULL; + uint64_t value = 0; + ec_t *ec = xl->private; + +@@ -775,6 +776,12 @@ __ec_fd_get(fd_t *fd, xlator_t *xl) + GF_FREE(ctx); + return NULL; + } ++ /* Only refering bad-version so no need for lock ++ * */ ++ ictx = __ec_inode_get(fd->inode, xl); ++ if (ictx) { ++ ctx->bad_version = ictx->bad_version; ++ } + } + } else { + ctx = (ec_fd_t *)(uintptr_t)value; +diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h +index 7c28808..be86b37 100644 +--- a/xlators/cluster/ec/src/ec-messages.h ++++ b/xlators/cluster/ec/src/ec-messages.h +@@ -55,6 +55,6 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, + EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, + EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, + EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, +- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); ++ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, EC_MSG_FD_BAD); + + #endif /* !_EC_MESSAGES_H_ */ +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 1c295c0..f27f2ec 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -150,6 +150,7 @@ struct _ec_fd { + loc_t loc; + uintptr_t open; + int32_t flags; ++ uint64_t bad_version; + ec_fd_status_t fd_status[0]; + }; + +@@ -180,6 +181,7 @@ struct _ec_inode { + uint64_t dirty[2]; + struct list_head heal; + ec_stripe_list_t stripe_cache; ++ uint64_t bad_version; + }; + + typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, +-- +1.8.3.1 + diff --git a/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch b/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch new file mode 100644 index 0000000..8dd3fca --- /dev/null +++ b/SOURCES/0288-cluster-ec-Fix-coverity-issues.patch @@ -0,0 +1,77 @@ +From ccf7775760dd923e21341438725946737eb8d8af Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Sat, 7 Sep 2019 20:18:01 +0530 +Subject: [PATCH 288/297] cluster/ec: Fix coverity issues + +Fixed the following coverity issue in both flush/fsync +>>> CID 1404964: Null pointer dereferences (REVERSE_INULL) +>>> Null-checking "fd" suggests that it may be null, but it has already +been dereferenced on all paths leading to the check. +>>> if (fd != NULL) { +>>> fop->fd = fd_ref(fd); +>>> if (fop->fd == NULL) { +>>> gf_msg(this->name, GF_LOG_ERROR, 0, +>>> "Failed to reference a " +>>> "file descriptor."); + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23382 +fixes: bz#1745107 +Change-Id: I19c05d585e23f8fbfbc195d1f3775ec528eed671 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/180673 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-generic.c | 28 ++++++++++++++++------------ + 1 file changed, 16 insertions(+), 12 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c +index b019050..192bb02 100644 +--- a/xlators/cluster/ec/src/ec-generic.c ++++ b/xlators/cluster/ec/src/ec-generic.c +@@ -196,12 +196,14 @@ ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- error = ec_validate_fd(fd, this); +- if (error) { +- gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, +- "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], +- fd->inode ? uuid_utoa(fd->inode->gfid) : ""); +- goto out; ++ if (fd) { ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } + } + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags, +@@ -420,12 +422,14 @@ ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target, + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + +- error = ec_validate_fd(fd, this); +- if (error) { +- gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, +- "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], +- fd->inode ? uuid_utoa(fd->inode->gfid) : ""); +- goto out; ++ if (fd) { ++ error = ec_validate_fd(fd, this); ++ if (error) { ++ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD, ++ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC], ++ fd->inode ? uuid_utoa(fd->inode->gfid) : ""); ++ goto out; ++ } + } + + fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags, +-- +1.8.3.1 + diff --git a/SOURCES/0289-cluster-ec-quorum-count-implementation.patch b/SOURCES/0289-cluster-ec-quorum-count-implementation.patch new file mode 100644 index 0000000..6d24813 --- /dev/null +++ b/SOURCES/0289-cluster-ec-quorum-count-implementation.patch @@ -0,0 +1,721 @@ +From 0d54bb417e982a100ceefb5eab2a61a17e840f39 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Thu, 5 Sep 2019 16:12:39 +0530 +Subject: [PATCH 289/297] cluster/ec: quorum-count implementation + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23366 +upstream-issue: #721 +fixes: bz#1748688 +Change-Id: I5333540e3c635ccf441cf1f4696e4c8986e38ea8 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/180674 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 4 +- + tests/basic/ec/ec-quorum-count-partial-failure.t | 50 +++++++ + tests/basic/ec/ec-quorum-count.t | 165 +++++++++++++++++++++++ + tests/ec.rc | 9 ++ + xlators/cluster/ec/src/ec-common.c | 13 ++ + xlators/cluster/ec/src/ec-common.h | 24 ++++ + xlators/cluster/ec/src/ec-dir-write.c | 57 ++++---- + xlators/cluster/ec/src/ec-inode-write.c | 61 ++++----- + xlators/cluster/ec/src/ec-types.h | 1 + + xlators/cluster/ec/src/ec.c | 13 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 46 +++++++ + 11 files changed, 383 insertions(+), 60 deletions(-) + create mode 100755 tests/basic/ec/ec-quorum-count-partial-failure.t + create mode 100644 tests/basic/ec/ec-quorum-count.t + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 55476f6..bdc8b3d 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,7 +50,7 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_8_0 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ +@@ -136,6 +136,8 @@ + + #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ + ++#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */ ++ + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/tests/basic/ec/ec-quorum-count-partial-failure.t b/tests/basic/ec/ec-quorum-count-partial-failure.t +new file mode 100755 +index 0000000..79f5825 +--- /dev/null ++++ b/tests/basic/ec/ec-quorum-count-partial-failure.t +@@ -0,0 +1,50 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++#This test checks that partial failure of fop results in main fop failure only ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5} ++TEST $CLI volume set $V0 performance.flush-behind off ++TEST $CLI volume start $V0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1 ++TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1 ++TEST cp $M0/b $M0/c ++TEST fallocate -p -l 101 $M0/c ++TEST $CLI volume stop $V0 ++TEST $CLI volume set $V0 debug.delay-gen posix; ++TEST $CLI volume set $V0 delay-gen.delay-duration 10000000; ++TEST $CLI volume set $V0 delay-gen.enable WRITE; ++TEST $CLI volume set $V0 delay-gen.delay-percentage 100 ++TEST $CLI volume set $V0 disperse.quorum-count 6 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}') ++truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure ++fallocate -p -l 101 $M0/b & ++sleep 1 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST wait ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++EXPECT "12345" stat --format=%s $M0/a ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0; ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0 ++cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}') ++TEST [[ $cksum == $cksum_after_heal ]] ++cksum=$(dd if=$M0/c | md5sum | awk '{print $1}') ++cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}') ++TEST [[ $cksum == $cksum_after_heal ]] ++ ++cleanup; +diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t +new file mode 100644 +index 0000000..56b5329 +--- /dev/null ++++ b/tests/basic/ec/ec-quorum-count.t +@@ -0,0 +1,165 @@ ++ #!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../ec.rc ++ ++cleanup ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} ++TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5} ++TEST $CLI volume set $V0 disperse.eager-lock-timeout 5 ++TEST $CLI volume set $V0 performance.flush-behind off ++ ++#Should fail on non-disperse volume ++TEST ! $CLI volume set $V1 disperse.quorum-count 5 ++ ++#Should succeed on a valid range ++TEST ! $CLI volume set $V0 disperse.quorum-count 0 ++TEST ! $CLI volume set $V0 disperse.quorum-count -0 ++TEST ! $CLI volume set $V0 disperse.quorum-count abc ++TEST ! $CLI volume set $V0 disperse.quorum-count 10abc ++TEST ! $CLI volume set $V0 disperse.quorum-count 1 ++TEST ! $CLI volume set $V0 disperse.quorum-count 2 ++TEST ! $CLI volume set $V0 disperse.quorum-count 3 ++TEST $CLI volume set $V0 disperse.quorum-count 4 ++TEST $CLI volume start $V0 ++TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++#Test that the option is reflected in the mount ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count ++TEST $CLI volume reset $V0 disperse.quorum-count ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count ++TEST $CLI volume set $V0 disperse.quorum-count 6 ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count ++ ++TEST touch $M0/a ++TEST touch $M0/data ++TEST setfattr -n trusted.def -v def $M0/a ++TEST touch $M0/src ++TEST touch $M0/del-me ++TEST mkdir $M0/dir1 ++TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct ++TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct ++TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file ++#modify operations should fail as the file is not in quorum ++TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++#Read should succeed even when quorum-count is not met ++TEST dd if=$M0/read-file of=/dev/null iflag=direct ++TEST ! touch $M0/a2 ++TEST ! mkdir $M0/dir2 ++TEST ! mknod $M0/b2 b 4 5 ++TEST ! ln -s $M0/a $M0/symlink ++TEST ! ln $M0/a $M0/link ++TEST ! mv $M0/src $M0/dst ++TEST ! rm -f $M0/del-me ++TEST ! rmdir $M0/dir1 ++TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc ++TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc ++TEST ! truncate -s 0 $M0/a ++TEST ! setfattr -n trusted.abc -v abc $M0/a ++TEST ! setfattr -x trusted.def $M0/a ++TEST ! chmod +x $M0/a ++TEST ! fallocate -l 2m -n $M0/a ++TEST ! fallocate -p -l 512k $M0/a ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++ ++# reset the option and check whether the default redundancy count is ++# accepted or not. ++TEST $CLI volume reset $V0 disperse.quorum-count ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count ++TEST touch $M0/a1 ++TEST touch $M0/data1 ++TEST setfattr -n trusted.def -v def $M0/a1 ++TEST touch $M0/src1 ++TEST touch $M0/del-me1 ++TEST mkdir $M0/dir11 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST touch $M0/a21 ++TEST mkdir $M0/dir21 ++TEST mknod $M0/b21 b 4 5 ++TEST ln -s $M0/a1 $M0/symlink1 ++TEST ln $M0/a1 $M0/link1 ++TEST mv $M0/src1 $M0/dst1 ++TEST rm -f $M0/del-me1 ++TEST rmdir $M0/dir11 ++TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc ++TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc ++TEST truncate -s 0 $M0/a1 ++TEST setfattr -n trusted.abc -v abc $M0/a1 ++TEST setfattr -x trusted.def $M0/a1 ++TEST chmod +x $M0/a1 ++TEST fallocate -l 2m -n $M0/a1 ++TEST fallocate -p -l 512k $M0/a1 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++ ++TEST touch $M0/a2 ++TEST touch $M0/data2 ++TEST setfattr -n trusted.def -v def $M0/a1 ++TEST touch $M0/src2 ++TEST touch $M0/del-me2 ++TEST mkdir $M0/dir12 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++TEST ! touch $M0/a22 ++TEST ! mkdir $M0/dir22 ++TEST ! mknod $M0/b22 b 4 5 ++TEST ! ln -s $M0/a2 $M0/symlink2 ++TEST ! ln $M0/a2 $M0/link2 ++TEST ! mv $M0/src2 $M0/dst2 ++TEST ! rm -f $M0/del-me2 ++TEST ! rmdir $M0/dir12 ++TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc ++TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc ++TEST ! truncate -s 0 $M0/a2 ++TEST ! setfattr -n trusted.abc -v abc $M0/a2 ++TEST ! setfattr -x trusted.def $M0/a2 ++TEST ! chmod +x $M0/a2 ++TEST ! fallocate -l 2m -n $M0/a2 ++TEST ! fallocate -p -l 512k $M0/a2 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++ ++# Set quorum-count to 5 and kill 1 brick and the fops should pass ++TEST $CLI volume set $V0 disperse.quorum-count 5 ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count ++TEST touch $M0/a3 ++TEST touch $M0/data3 ++TEST setfattr -n trusted.def -v def $M0/a3 ++TEST touch $M0/src3 ++TEST touch $M0/del-me3 ++TEST mkdir $M0/dir13 ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST touch $M0/a31 ++TEST mkdir $M0/dir31 ++TEST mknod $M0/b31 b 4 5 ++TEST ln -s $M0/a3 $M0/symlink3 ++TEST ln $M0/a3 $M0/link3 ++TEST mv $M0/src3 $M0/dst3 ++TEST rm -f $M0/del-me3 ++TEST rmdir $M0/dir13 ++TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc ++TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc ++TEST truncate -s 0 $M0/a3 ++TEST setfattr -n trusted.abc -v abc $M0/a3 ++TEST setfattr -x trusted.def $M0/a3 ++TEST chmod +x $M0/a3 ++TEST fallocate -l 2m -n $M0/a3 ++TEST fallocate -p -l 512k $M0/a3 ++TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct ++cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')" ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0} ++TEST kill_brick $V0 $H0 $B0/${V0}4 ++TEST kill_brick $V0 $H0 $B0/${V0}5 ++cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}') ++TEST [[ $cksum_before_heal == $cksum_after_heal ]] ++cleanup; +diff --git a/tests/ec.rc b/tests/ec.rc +index 04405ec..f18752f 100644 +--- a/tests/ec.rc ++++ b/tests/ec.rc +@@ -7,3 +7,12 @@ function ec_up_status() + local ec_id=$3 + grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'=' + } ++ ++function ec_option_value() ++{ ++ local v=$1 ++ local m=$2 ++ local ec_id=$3 ++ local opt=$4 ++ grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}' ++} +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 92d4e5d..2e59180 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -707,6 +707,19 @@ ec_child_select(ec_fop_data_t *fop) + return 0; + } + ++ if (!fop->parent && fop->lock_count && ++ (fop->locks[0].update[EC_DATA_TXN] || ++ fop->locks[0].update[EC_METADATA_TXN])) { ++ if (ec->quorum_count && (num < ec->quorum_count)) { ++ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT, ++ "Insufficient available children " ++ "for this request (have %d, need " ++ "%d). %s", ++ num, ec->quorum_count, ec_msg_str(fop)); ++ return 0; ++ } ++ } ++ + return 1; + } + +diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h +index 3c69471..eab86ee 100644 +--- a/xlators/cluster/ec/src/ec-common.h ++++ b/xlators/cluster/ec/src/ec-common.h +@@ -26,6 +26,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t; + + #define EC_FLAG_LOCK_SHARED 0x0001 + ++#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \ ++ do { \ ++ ec_t *__ec = fop->xl->private; \ ++ int32_t __op_ret = 0; \ ++ int32_t __op_errno = 0; \ ++ int32_t __success_count = gf_bits_count(fop->good); \ ++ \ ++ __op_ret = op_ret; \ ++ __op_errno = op_errno; \ ++ if (!fop->parent && frame && \ ++ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \ ++ __ec->quorum_count && (__success_count < __ec->quorum_count) && \ ++ op_ret >= 0) { \ ++ __op_ret = -1; \ ++ __op_errno = EIO; \ ++ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \ ++ EC_MSG_CHILDS_INSUFFICIENT, \ ++ "Insufficient available children for this request " \ ++ "(have %d, need %d). %s", \ ++ __success_count, __ec->quorum_count, ec_msg_str(fop)); \ ++ } \ ++ fn(frame, cookie, this, __op_ret, __op_errno, params); \ ++ } while (0) ++ + enum _ec_xattrop_flags { + EC_FLAG_XATTROP, + EC_FLAG_DATA_DIRTY, +diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c +index 0b8ee21..8192462 100644 +--- a/xlators/cluster/ec/src/ec-dir-write.c ++++ b/xlators/cluster/ec/src/ec-dir-write.c +@@ -218,10 +218,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.create != NULL) { +- fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->fd, fop->loc[0].inode, +- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->fd, ++ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1], ++ &cbk->iatt[2], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -390,9 +390,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.link != NULL) { +- fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], +- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); ++ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -569,9 +570,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.mkdir != NULL) { +- fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], +- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); ++ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -773,9 +775,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.mknod != NULL) { +- fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0], +- &cbk->iatt[1], &cbk->iatt[2], cbk->xdata); ++ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -931,10 +934,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.rename != NULL) { +- fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3], ++ &cbk->iatt[4], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1083,9 +1086,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.rmdir != NULL) { +- fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1237,10 +1240,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.symlink != NULL) { +- fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, fop->loc[0].inode, +- &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, fop->loc[0].inode, ++ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2], ++ cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1392,9 +1395,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.unlink != NULL) { +- fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c +index 8bfa3b4..2dbb4db 100644 +--- a/xlators/cluster/ec/src/ec-inode-write.c ++++ b/xlators/cluster/ec/src/ec-inode-write.c +@@ -185,26 +185,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + switch (fop->id) { + case GF_FOP_SETXATTR: + if (fop->cbks.setxattr) { +- fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret, ++ op_errno, xdata); + } + break; + case GF_FOP_REMOVEXATTR: + if (fop->cbks.removexattr) { +- fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this, ++ op_ret, op_errno, xdata); + } + break; + case GF_FOP_FSETXATTR: + if (fop->cbks.fsetxattr) { +- fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this, ++ op_ret, op_errno, xdata); + } + break; + case GF_FOP_FREMOVEXATTR: + if (fop->cbks.fremovexattr) { +- fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno, +- xdata); ++ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this, ++ op_ret, op_errno, xdata); + } + break; + } +@@ -494,16 +494,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state) + + if (fop->id == GF_FOP_SETATTR) { + if (fop->cbks.setattr != NULL) { +- fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], +- &cbk->iatt[1], cbk->xdata); ++ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } else { + if (fop->cbks.fsetattr != NULL) { +- fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, +- cbk->op_ret, cbk->op_errno, +- &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } + +@@ -994,9 +993,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.fallocate != NULL) { +- fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1247,9 +1246,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.discard != NULL) { +- fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +@@ -1477,17 +1476,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state) + + if (fop->id == GF_FOP_TRUNCATE) { + if (fop->cbks.truncate != NULL) { +- fop->cbks.truncate(fop->req_frame, fop, fop->xl, +- cbk->op_ret, cbk->op_errno, +- &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } else { + if (fop->cbks.ftruncate != NULL) { +- fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, +- cbk->op_ret, cbk->op_errno, +- &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop, ++ fop->xl, cbk->op_ret, cbk->op_errno, ++ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata); + } + } + +@@ -2245,9 +2242,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state) + GF_ASSERT(cbk != NULL); + + if (fop->cbks.writev != NULL) { +- fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret, +- cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1], +- cbk->xdata); ++ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl, ++ cbk->op_ret, cbk->op_errno, &cbk->iatt[0], ++ &cbk->iatt[1], cbk->xdata); + } + + return EC_STATE_LOCK_REUSE; +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f27f2ec..ea4f6ad 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -654,6 +654,7 @@ struct _ec { + gf_boolean_t optimistic_changelog; + gf_boolean_t parallel_writes; + uint32_t stripe_cache; ++ uint32_t quorum_count; + uint32_t background_heals; + uint32_t heal_wait_qlen; + uint32_t self_heal_window_size; /* max size of read/writes */ +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 3c8013e..19094c4 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options) + GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool, + failed); + GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed); ++ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed); + ret = 0; + if (ec_assign_read_policy(ec, read_policy)) { + ret = -1; +@@ -720,6 +721,7 @@ init(xlator_t *this) + failed); + GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed); + GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed); ++ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed); + + this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this); + if (!this->itable) +@@ -1402,6 +1404,7 @@ ec_dump_private(xlator_t *this) + gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters); + gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]); + gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes); ++ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count); + + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache", + this->type, this->name); +@@ -1672,6 +1675,16 @@ struct volume_options options[] = { + "lead to extra memory consumption, maximum " + "(cache size * stripe size) Bytes per open file."}, + { ++ .key = {"quorum-count"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "0", ++ .description = ++ "This option can be used to define how many successes on" ++ "the bricks constitute a success to the application. This" ++ " count should be in the range" ++ "[disperse-data-count, disperse-count] (inclusive)", ++ }, ++ { + .key = {NULL}, + }, + }; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 8ce338e..7ca47a6 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -1128,6 +1128,42 @@ out: + } + + static int ++validate_disperse_quorum_count(glusterd_volinfo_t *volinfo, dict_t *dict, ++ char *key, char *value, char **op_errstr) ++{ ++ int ret = -1; ++ int quorum_count = 0; ++ int data_count = 0; ++ ++ ret = gf_string2int(value, &quorum_count); ++ if (ret) { ++ gf_asprintf(op_errstr, ++ "%s is not an integer. %s expects a " ++ "valid integer value.", ++ value, key); ++ goto out; ++ } ++ ++ if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) { ++ gf_asprintf(op_errstr, "Cannot set %s for a non-disperse volume.", key); ++ ret = -1; ++ goto out; ++ } ++ ++ data_count = volinfo->disperse_count - volinfo->redundancy_count; ++ if (quorum_count < data_count || quorum_count > volinfo->disperse_count) { ++ gf_asprintf(op_errstr, "%d for %s is out of range [%d - %d]", ++ quorum_count, key, data_count, volinfo->disperse_count); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int + validate_parallel_readdir(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, + char *value, char **op_errstr) + { +@@ -3663,6 +3699,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .type = NO_DOC, + .op_version = GD_OP_VERSION_3_13_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ {.key = "disperse.quorum-count", ++ .voltype = "cluster/disperse", ++ .type = NO_DOC, ++ .op_version = GD_OP_VERSION_8_0, ++ .validate_fn = validate_disperse_quorum_count, ++ .description = "This option can be used to define how many successes on" ++ "the bricks constitute a success to the application. This" ++ " count should be in the range" ++ "[disperse-data-count, disperse-count] (inclusive)", ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, + { + .key = "features.sdfs", + .voltype = "features/sdfs", +-- +1.8.3.1 + diff --git a/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch b/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch new file mode 100644 index 0000000..01ea8c2 --- /dev/null +++ b/SOURCES/0290-glusterd-tag-disperse.quorum-count-for-31306.patch @@ -0,0 +1,84 @@ +From 312da653ac80b537af06139f8d83a63180c72461 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Tue, 10 Sep 2019 14:04:17 +0530 +Subject: [PATCH 290/297] glusterd: tag disperse.quorum-count for 31306 + +In upstream disperse.quorum-count is makred for release-8 +latest new op-version is 31306. + +Label: DOWNSTREAM ONLY + +fixes: bz#1748688 +Change-Id: I88fdbd56ce3b8475b5ec670659adaa9d11c01d97 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/180675 +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 12 ++++++------ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 2 +- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index bdc8b3d..e218285 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,19 +50,19 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_8_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ + versions */ + +-#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */ ++#define GD_OP_VERSION_RHS_3_0 30000 /* Op-Version of RHS 3.0 */ + + #define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_RHS_3_0 + +-#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */ ++#define GD_OP_VERSION_RHS_2_1_5 20105 /* RHS 2.1 update 5 */ + +-#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */ ++#define GD_OP_VERSION_RHS_3_0_4 30004 /* Op-Version of RHS 3.0.4 */ + + #define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */ + +@@ -124,6 +124,8 @@ + + #define GD_OP_VERSION_3_13_5 31305 /* Op-version for GlusterFS 3.13.5 */ + ++#define GD_OP_VERSION_3_13_6 31306 /* Op-version for GlusterFS 3.13.6 */ ++ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ + + #define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */ +@@ -136,8 +138,6 @@ + + #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ + +-#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */ +- + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 7ca47a6..16601a2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3702,7 +3702,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "disperse.quorum-count", + .voltype = "cluster/disperse", + .type = NO_DOC, +- .op_version = GD_OP_VERSION_8_0, ++ .op_version = GD_OP_VERSION_3_13_6, + .validate_fn = validate_disperse_quorum_count, + .description = "This option can be used to define how many successes on" + "the bricks constitute a success to the application. This" +-- +1.8.3.1 + diff --git a/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch b/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch new file mode 100644 index 0000000..efdbc23 --- /dev/null +++ b/SOURCES/0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch @@ -0,0 +1,106 @@ +From 87d8070f80487322a1736846a78725fd88f8de34 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Tue, 20 Aug 2019 13:27:24 +0530 +Subject: [PATCH 291/297] cluster/ec: Mark release only when it is acquired + +Problem: +Mount-1 Mount-2 +1)Tries to acquire lock on 'dir1' 1)Tries to acquire lock on 'dir1' +2)Lock is granted on brick-0 2)Lock gets EAGAIN on brick-0 and + leads to blocking lock on brick-0 +3)Gets a lock-contention 3) Doesn't matter what happens on mount-2 + notification, marks lock->release from here on. + to true. +4)New fop comes on 'dir1' which will + be put in frozen list as lock->release + is set to true. +5) Lock acquisition from step-2 fails because +3 bricks went down in 4+2 setup. + +Fop on mount-1 which is put in frozen list will hang because no codepath will +move it from frozen list to any other list and the lock will not be retried. + +Fix: +Don't set lock->release to true if lock is not acquired at the time of +lock-contention-notification + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23272 +fixes: bz#1731896 +Change-Id: Ie6630db8735ccf372cc54b873a3a3aed7a6082b7 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/180870 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 20 ++++++++++++++++++-- + xlators/cluster/ec/src/ec-types.h | 1 + + 2 files changed, 19 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 2e59180..5cae37b 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -1867,6 +1867,10 @@ ec_lock_acquired(ec_lock_link_t *link) + LOCK(&lock->loc.inode->lock); + + lock->acquired = _gf_true; ++ if (lock->contention) { ++ lock->release = _gf_true; ++ lock->contention = _gf_false; ++ } + + ec_lock_update_fd(lock, fop); + ec_lock_wake_shared(lock, &list); +@@ -1892,15 +1896,20 @@ ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + ec_lock_link_t *link = NULL; + ec_lock_t *lock = NULL; + ++ link = fop->data; ++ lock = link->lock; + if (op_ret >= 0) { +- link = fop->data; +- lock = link->lock; + lock->mask = lock->good_mask = fop->good; + lock->healing = 0; + + ec_lock_acquired(link); + ec_lock(fop->parent); + } else { ++ LOCK(&lock->loc.inode->lock); ++ { ++ lock->contention = _gf_false; ++ } ++ UNLOCK(&lock->loc.inode->lock); + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED, + "Failed to complete preop lock"); + } +@@ -2547,6 +2556,13 @@ ec_lock_release(ec_t *ec, inode_t *inode) + gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention", + inode); + ++ if (!lock->acquired) { ++ /* This happens if some bricks already got the lock while inodelk is in ++ * progress. Set release to true after lock is acquired*/ ++ lock->contention = _gf_true; ++ goto done; ++ } ++ + /* The lock is not marked to be released, so the frozen list should be + * empty. */ + GF_ASSERT(list_empty(&lock->frozen)); +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index ea4f6ad..34a9768 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -267,6 +267,7 @@ struct _ec_lock { + uint32_t refs_pending; /* Refs assigned to fops being prepared */ + uint32_t waiting_flags; /*Track xattrop/dirty marking*/ + gf_boolean_t acquired; ++ gf_boolean_t contention; + gf_boolean_t unlock_now; + gf_boolean_t release; + gf_boolean_t query; +-- +1.8.3.1 + diff --git a/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch b/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch new file mode 100644 index 0000000..07fc8f4 --- /dev/null +++ b/SOURCES/0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch @@ -0,0 +1,72 @@ +From 769263ad422e3c1069de0994ff2274044982b242 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Sun, 1 Sep 2019 12:01:09 +0530 +Subject: [PATCH 292/297] rpc: Update address family if it is not provide in + cmd-line arguments + +Problem: After enabling transport-type to inet6 and passed ipv6 + transport.socket.bind-address in glusterd.vol clients are + not started. + +Solution: Need to update address-family based on remote-address for + all gluster client process + +> Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31 +> Fixes: bz#1747746 +> Credits: Amgad Saleh <amgad.saleh@nokia.com> +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit 80b8cfe3f1386606bada97a76a0cad7acdf6b877) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23340/) + +Change-Id: Iaa3588cd87cebc45231bfd675745c1a457dc9b31 +BUG: 1750241 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181184 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/name.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c +index 7f18cc4..b473f3b 100644 +--- a/rpc/rpc-transport/socket/src/name.c ++++ b/rpc/rpc-transport/socket/src/name.c +@@ -214,6 +214,7 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this, + uint16_t remote_port = 0; + struct addrinfo *addr_info = NULL; + int32_t ret = 0; ++ struct in6_addr serveraddr; + + remote_host_data = dict_get(options, "remote-host"); + if (remote_host_data == NULL) { +@@ -249,6 +250,13 @@ af_inet_client_get_remote_sockaddr(rpc_transport_t *this, + goto err; + } + ++ /* Need to update transport-address family if address-family is not provide ++ to command-line arguments ++ */ ++ if (inet_pton(AF_INET6, remote_host, &serveraddr)) { ++ sockaddr->sa_family = AF_INET6; ++ } ++ + /* TODO: gf_resolve is a blocking call. kick in some + non blocking dns techniques */ + ret = gf_resolve_ip6(remote_host, remote_port, sockaddr->sa_family, +@@ -522,7 +530,10 @@ socket_client_get_remote_sockaddr(rpc_transport_t *this, + ret = -1; + } + +- if (*sa_family == AF_UNSPEC) { ++ /* Address-family is updated based on remote_host in ++ af_inet_client_get_remote_sockaddr ++ */ ++ if (*sa_family != sockaddr->sa_family) { + *sa_family = sockaddr->sa_family; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch b/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch new file mode 100644 index 0000000..23120cb --- /dev/null +++ b/SOURCES/0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch @@ -0,0 +1,69 @@ +From 8f89aef9691b0806d7487525c6a54a1a615c8bc1 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Mon, 2 Sep 2019 10:46:10 +0530 +Subject: [PATCH 293/297] glusterd: IPV6 hostname address is not parsed + correctly + +Problem: IPV6 hostname address is not parsed correctly in function + glusterd_check_brick_order + +Solution: Update the code to parse hostname address + +> Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab +> Fixes: bz#1747746 +> Credits: Amgad Saleh <amgad.saleh@nokia.com> +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit 6563ffb04d7ba51a89726e7c5bbb85c7dbc685b5) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23341/) + +Change-Id: Ifb2f83f9c6e987b2292070e048e97eeb51b728ab +BUG: 1750241 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181185 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 1ea8ba6..076bc80 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -95,6 +95,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str) + int32_t type = GF_CLUSTER_TYPE_NONE; + int32_t sub_count = 0; + struct addrinfo *ai_info = NULL; ++ char brick_addr[128] = { ++ 0, ++ }; ++ int addrlen = 0; + + const char failed_string[2048] = + "Failed to perform brick order " +@@ -182,15 +186,17 @@ glusterd_check_brick_order(dict_t *dict, char *err_str) + brick_list_dup = tmpptr; + if (brick == NULL) + goto check_failed; +- brick = strtok_r(brick, ":", &tmpptr); +- if (brick == NULL) ++ tmpptr = strrchr(brick, ':'); ++ if (tmpptr == NULL) + goto check_failed; +- ret = getaddrinfo(brick, NULL, NULL, &ai_info); ++ addrlen = strlen(brick) - strlen(tmpptr); ++ strncpy(brick_addr, brick, addrlen); ++ brick_addr[addrlen] = '\0'; ++ ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); + if (ret != 0) { + ret = 0; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, +- "unable to resolve " +- "host name"); ++ "unable to resolve host name for addr %s", brick_addr); + goto out; + } + ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); +-- +1.8.3.1 + diff --git a/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch b/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch new file mode 100644 index 0000000..1665185 --- /dev/null +++ b/SOURCES/0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch @@ -0,0 +1,59 @@ +From 2fa5476b95d4547bdde50f2281bf58b7db24e37a Mon Sep 17 00:00:00 2001 +From: Aravinda VK <avishwan@redhat.com> +Date: Mon, 16 Sep 2019 10:04:26 +0530 +Subject: [PATCH 294/297] eventsapi: Set IPv4/IPv6 family based on input IP + +server.sin_family was set to AF_INET while creating socket connection, +this was failing if the input address is IPv6(`::1`). + +With this patch, sin_family is set by reading the ai_family of +`getaddrinfo` result. + +> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23423/ + +>Fixes: bz#1752330 +>Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb +>Signed-off-by: Aravinda VK <avishwan@redhat.com> + +BUG: 1732443 +Change-Id: I499f957b432842fa989c698f6e5b25b7016084eb +Signed-off-by: Aravinda VK <avishwan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181197 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/events.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 2509767..9d33783 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -42,6 +42,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo hints; + struct addrinfo *result = NULL; + xlator_t *this = THIS; ++ int sin_family = AF_INET; + + /* Global context */ + ctx = THIS->ctx; +@@ -75,13 +76,15 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + ret = EVENT_ERROR_RESOLVE; + goto out; + } ++ ++ sin_family = result->ai_family; + } else { + /* Localhost, Use the defined IP for localhost */ + host = gf_strdup(EVENT_HOST); + } + + /* Socket Configurations */ +- server.sin_family = AF_INET; ++ server.sin_family = sin_family; + server.sin_port = htons(EVENT_PORT); + ret = inet_pton(server.sin_family, host, &server.sin_addr); + if (ret <= 0) { +-- +1.8.3.1 + diff --git a/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch b/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch new file mode 100644 index 0000000..9d3820d --- /dev/null +++ b/SOURCES/0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch @@ -0,0 +1,1164 @@ +From d5ce2300f77c25b38a076d4dd6a5521e82c56172 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 29 Jul 2019 18:30:42 +0530 +Subject: [PATCH 295/297] ctime/rebalance: Heal ctime xattr on directory during + rebalance + +After add-brick and rebalance, the ctime xattr is not present +on rebalanced directories on new brick. This patch fixes the +same. + +Note that ctime still doesn't support consistent time across +distribute sub-volume. + +This patch also fixes the in-memory inconsistency of time attributes +when metadata is self healed. + +Backport of: + > Patch: https://review.gluster.org/23127/ + > Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df + > fixes: bz#1734026 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df +BUG: 1728673 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181105 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + tests/basic/afr/split-brain-healing-ctime.t | 253 +++++++++++++++++++++ + tests/basic/afr/split-brain-healing.t | 1 + + tests/basic/ctime/ctime-ec-heal.t | 71 ++++++ + tests/basic/ctime/ctime-ec-rebalance.t | 44 ++++ + tests/basic/ctime/ctime-rep-heal.t | 71 ++++++ + tests/basic/ctime/ctime-rep-rebalance.t | 42 ++++ + .../bug-1734370-entry-heal-restore-time.t | 84 +++++++ + tests/volume.rc | 15 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 3 +- + xlators/cluster/afr/src/afr-self-heal-entry.c | 2 + + xlators/cluster/dht/src/dht-common.c | 1 + + xlators/cluster/ec/src/ec-heal.c | 7 +- + xlators/storage/posix/src/posix-entry-ops.c | 8 +- + xlators/storage/posix/src/posix-helpers.c | 31 ++- + xlators/storage/posix/src/posix-inode-fd-ops.c | 57 ++--- + xlators/storage/posix/src/posix-metadata.c | 65 +++++- + xlators/storage/posix/src/posix-metadata.h | 7 + + xlators/storage/posix/src/posix.h | 5 +- + 18 files changed, 714 insertions(+), 53 deletions(-) + create mode 100644 tests/basic/afr/split-brain-healing-ctime.t + create mode 100644 tests/basic/ctime/ctime-ec-heal.t + create mode 100644 tests/basic/ctime/ctime-ec-rebalance.t + create mode 100644 tests/basic/ctime/ctime-rep-heal.t + create mode 100644 tests/basic/ctime/ctime-rep-rebalance.t + create mode 100644 tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t + +diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t +new file mode 100644 +index 0000000..1ca18e3 +--- /dev/null ++++ b/tests/basic/afr/split-brain-healing-ctime.t +@@ -0,0 +1,253 @@ ++#!/bin/bash ++ ++#Test the split-brain resolution CLI commands. ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++function get_replicate_subvol_number { ++ local filename=$1 ++ #get_backend_paths ++ if [ -f $B0/${V0}1/$filename ] ++ then ++ echo 0 ++ elif [ -f $B0/${V0}3/$filename ] ++ then echo 1 ++ else ++ echo -1 ++ fi ++} ++ ++cleanup; ++ ++AREQUAL_PATH=$(dirname $0)/../../utils ++GET_MDATA_PATH=$(dirname $0)/../../utils ++CFLAGS="" ++test "`uname -s`" != "Linux" && { ++ CFLAGS="$CFLAGS -lintl"; ++} ++build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS ++build_tester $GET_MDATA_PATH/get-mdata-xattr.c ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} ++TEST $CLI volume set $V0 cluster.self-heal-daemon off ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++cd $M0 ++for i in {1..10} ++do ++ echo "Initial content">>file$i ++done ++ ++replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`) ++replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`) ++ ++############ Create data split-brain in the files. ########################### ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++for file in ${!replica_0_files_list[*]} ++do ++ echo "B1 is down">>${replica_0_files_list[$file]} ++done ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++for file in ${!replica_1_files_list[*]} ++do ++ echo "B3 is down">>${replica_1_files_list[$file]} ++done ++ ++SMALLER_FILE_SIZE=$(stat -c %s file1) ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++for file in ${!replica_0_files_list[*]} ++do ++ echo "B2 is down">>${replica_0_files_list[$file]} ++ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]} ++done ++TEST kill_brick $V0 $H0 $B0/${V0}4 ++for file in ${!replica_1_files_list[*]} ++do ++ echo "B4 is down">>${replica_1_files_list[$file]} ++ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]} ++done ++ ++BIGGER_FILE_SIZE=$(stat -c %s file1) ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 ++ ++ ++############### Acessing the files should now give EIO. ############################### ++TEST ! cat file1 ++TEST ! cat file2 ++TEST ! cat file3 ++TEST ! cat file4 ++TEST ! cat file5 ++TEST ! cat file6 ++TEST ! cat file7 ++TEST ! cat file8 ++TEST ! cat file9 ++TEST ! cat file10 ++################### ++TEST $CLI volume set $V0 cluster.self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 ++ ++################ Heal file1 using the bigger-file option ############## ++$CLI volume heal $V0 split-brain bigger-file /file1 ++EXPECT "0" echo $? ++EXPECT $BIGGER_FILE_SIZE stat -c %s file1 ++ ++################ Heal file2 using the bigger-file option and its gfid ############## ++subvolume=$(get_replicate_subvol_number file2) ++if [ $subvolume == 0 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2) ++elif [ $subvolume == 1 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2) ++fi ++GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++$CLI volume heal $V0 split-brain bigger-file $GFIDSTR ++EXPECT "0" echo $? ++ ++################ Heal file3 using the source-brick option ############## ++################ Use the brick having smaller file size as source ####### ++subvolume=$(get_replicate_subvol_number file3) ++if [ $subvolume == 0 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3 ++elif [ $subvolume == 1 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 ++fi ++EXPECT "0" echo $? ++EXPECT $SMALLER_FILE_SIZE stat -c %s file3 ++ ++################ Heal file4 using the source-brick option and it's gfid ############## ++################ Use the brick having smaller file size as source ####### ++subvolume=$(get_replicate_subvol_number file4) ++if [ $subvolume == 0 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR ++elif [ $subvolume == 1 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR ++fi ++EXPECT "0" echo $? ++EXPECT $SMALLER_FILE_SIZE stat -c %s file4 ++ ++# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed ++# as part of metadata heal. So mtime would be same, hence it can't be healed ++# using 'latest-mtime' policy, use 'source-brick' option instead. ++################ Heal file5 using the source-brick option ############## ++subvolume=$(get_replicate_subvol_number file5) ++if [ $subvolume == 0 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5 ++elif [ $subvolume == 1 ] ++then ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5 ++fi ++EXPECT "0" echo $? ++ ++if [ $subvolume == 0 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5) ++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5) ++elif [ $subvolume == 1 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5) ++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5) ++fi ++ ++#TODO: To below comparisons on full sub-second resolution ++ ++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ] ++ ++mtime_mount_after_heal=$(stat -c %Y file5) ++ ++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ] ++ ++################ Heal file6 using the source-brick option and its gfid ############## ++subvolume=$(get_replicate_subvol_number file6) ++if [ $subvolume == 0 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR ++elif [ $subvolume == 1 ] ++then ++ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6) ++ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" ++ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR ++fi ++EXPECT "0" echo $? ++ ++if [ $subvolume == 0 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6) ++ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6) ++elif [ $subvolume == 1 ] ++then ++ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6) ++ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6) ++fi ++ ++#TODO: To below comparisons on full sub-second resolution ++ ++TEST [ $mtime1_after_heal -eq $mtime2_after_heal ] ++ ++mtime_mount_after_heal=$(stat -c %Y file6) ++ ++TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ] ++ ++################ Heal remaining SB'ed files of replica_0 using B1 as source ############## ++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 ++EXPECT "0" echo $? ++ ++################ Heal remaining SB'ed files of replica_1 using B3 as source ############## ++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 ++EXPECT "0" echo $? ++ ++############### Reading the files should now succeed. ############################### ++TEST cat file1 ++TEST cat file2 ++TEST cat file3 ++TEST cat file4 ++TEST cat file5 ++TEST cat file6 ++TEST cat file7 ++TEST cat file8 ++TEST cat file9 ++TEST cat file10 ++ ++################ File contents on the bricks must be same. ################################ ++TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs) ++TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs) ++ ++############### Trying to heal files not in SB should fail. ############################### ++$CLI volume heal $V0 split-brain bigger-file /file1 ++EXPECT "1" echo $? ++$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 ++EXPECT "1" echo $? ++ ++cd - ++TEST rm $AREQUAL_PATH/arequal-checksum ++TEST rm $GET_MDATA_PATH/get-mdata-xattr ++cleanup +diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t +index 78553e6..315e815 100644 +--- a/tests/basic/afr/split-brain-healing.t ++++ b/tests/basic/afr/split-brain-healing.t +@@ -35,6 +35,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off + TEST $CLI volume set $V0 cluster.data-self-heal off + TEST $CLI volume set $V0 cluster.metadata-self-heal off + TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 ctime off + TEST $CLI volume start $V0 + TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t +new file mode 100644 +index 0000000..1cb4516 +--- /dev/null ++++ b/tests/basic/ctime/ctime-ec-heal.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++# ++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata' ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++mkdir $M0/dir1 ++echo "Initial content" > $M0/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++# Kill brick ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++ ++echo "B3 is down" >> $M0/file1 ++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1 ++echo "Entry heal file" > $M0/entry_heal_file1 ++mkdir $M0/entry_heal_dir1 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++TEST $CLI volume start $V0 force ++$CLI volume heal $V0 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++cleanup; +diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t +new file mode 100644 +index 0000000..caccdc1 +--- /dev/null ++++ b/tests/basic/ctime/ctime-ec-rebalance.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++# ++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../fallocate.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 ++ ++# Create files ++mkdir $M0/dir1 ++echo "test data" > $M0/dir1/file1 ++ ++# Add brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8} ++ ++#Trigger rebalance ++TEST $CLI volume rebalance $V0 start force ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 ++ ++#Verify ctime xattr heal on directory ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1" ++ ++b6_mdata=$(get_mdata "$B0/${V0}6/dir1") ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1 ++ ++cleanup; +diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t +new file mode 100644 +index 0000000..ba8b08a +--- /dev/null ++++ b/tests/basic/ctime/ctime-rep-heal.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++# ++# This will test self healing of ctime xattr 'trusted.glusterfs.mdata' ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++mkdir $M0/dir1 ++echo "Initial content" > $M0/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++# Kill brick ++TEST kill_brick $V0 $H0 $B0/${V0}3 ++ ++echo "B3 is down" >> $M0/file1 ++echo "Change dir1 time attributes" > $M0/dir1/dir1_file1 ++echo "Entry heal file" > $M0/entry_heal_file1 ++mkdir $M0/entry_heal_dir1 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++TEST $CLI volume start $V0 force ++$CLI volume heal $V0 ++ ++# Check xattr ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1 ++ ++cleanup; +diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t +new file mode 100644 +index 0000000..dd9743e +--- /dev/null ++++ b/tests/basic/ctime/ctime-rep-rebalance.t +@@ -0,0 +1,42 @@ ++#!/bin/bash ++# ++# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance ++# ++### ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++#cleate and start volume ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5} ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0 ++ ++#Mount the volume ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++mkdir $M0/dir1 ++ ++# Add brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8} ++ ++#Trigger rebalance ++TEST $CLI volume rebalance $V0 start force ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 ++ ++#Verify ctime xattr heal on directory ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1" ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1" ++ ++b6_mdata=$(get_mdata "$B0/${V0}6/dir1") ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1 ++ ++cleanup; +diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t +new file mode 100644 +index 0000000..298d6ed +--- /dev/null ++++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t +@@ -0,0 +1,84 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++function time_stamps_match { ++ path=$1 ++ mtime_source_b0=$(get_mtime $B0/${V0}0/$path) ++ atime_source_b0=$(get_atime $B0/${V0}0/$path) ++ mtime_source_b2=$(get_mtime $B0/${V0}2/$path) ++ atime_source_b2=$(get_atime $B0/${V0}2/$path) ++ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path) ++ atime_sink_b1=$(get_atime $B0/${V0}1/$path) ++ ++ #The same brick must be the source of heal for both atime and mtime. ++ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \ ++ ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]] ++ then ++ echo "Y" ++ else ++ echo "N" ++ fi ++ ++} ++ ++# Test that the parent dir's timestamps are restored during entry-heal. ++GET_MDATA_PATH=$(dirname $0)/../../utils ++build_tester $GET_MDATA_PATH/get-mdata-xattr.c ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 ctime on ++TEST $CLI volume start $V0; ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++############################################################################### ++TEST mkdir $M0/DIR ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST touch $M0/DIR/FILE ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++EXPECT "Y" time_stamps_match DIR ++ctime_source1=$(get_ctime $B0/${V0}0/$path) ++ctime_source2=$(get_ctime $B0/${V0}2/$path) ++ctime_sink=$(get_ctime $B0/${V0}1/$path) ++TEST [ $ctime_source1 -eq $ctime_sink ] ++TEST [ $ctime_source2 -eq $ctime_sink ] ++ ++############################################################################### ++# Repeat the test with ctime feature disabled. ++TEST $CLI volume set $V0 features.ctime off ++TEST mkdir $M0/DIR2 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST touch $M0/DIR2/FILE ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++EXPECT "Y" time_stamps_match DIR2 ++ ++TEST rm $GET_MDATA_PATH/get-mdata-xattr ++cleanup; +diff --git a/tests/volume.rc b/tests/volume.rc +index 76a8fd4..9a002d9 100644 +--- a/tests/volume.rc ++++ b/tests/volume.rc +@@ -371,6 +371,19 @@ function get_gfid2path { + getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null + } + ++function get_mdata { ++ local path=$1 ++ getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'=' ++} ++ ++function get_mdata_count { ++ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l ++} ++ ++function get_mdata_uniq_count { ++ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l ++} ++ + function get_xattr_key { + local key=$1 + local path=$2 +@@ -925,7 +938,7 @@ function get_ctime { + local time=$(get-mdata-xattr -c $1) + if [ $time == "-1" ]; + then +- echo $(stat -c %Z $2) ++ echo $(stat -c %Z $1) + else + echo $time + fi +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index b38085a..81ef38a 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -513,7 +513,8 @@ afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode, + + AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc, + &replies[source].poststat, +- (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL); ++ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME), ++ NULL); + + loc_wipe(&loc); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index e07b521..35b600f 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -1032,6 +1032,8 @@ unlock: + goto postop_unlock; + } + ++ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks, ++ locked_replies); + ret = afr_selfheal_undo_pending( + frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending, + AFR_ENTRY_TRANSACTION, locked_replies, postop_lock); +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 219b072..99cccd6 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -115,6 +115,7 @@ char *xattrs_to_heal[] = {"user.", + QUOTA_LIMIT_KEY, + QUOTA_LIMIT_OBJECTS_KEY, + GF_SELINUX_XATTR_KEY, ++ GF_XATTR_MDATA_KEY, + NULL}; + + char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL}; +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 0f0f398..06a7016 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2301,9 +2301,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd, + + loc.inode = inode_ref(fd->inode); + gf_uuid_copy(loc.gfid, fd->inode->gfid); +- ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies, +- output, frame, ec->xl, &loc, &source_buf, +- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL); ++ ret = cluster_setattr( ++ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame, ++ ec->xl, &loc, &source_buf, ++ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL); + EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes); + if (EC_COUNT(healed_sinks, ec->nodes) == 0) { + ret = -ENOTCONN; +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index 34ee2b8..283b305 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -500,7 +500,7 @@ post_op: + posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); + } + +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + if (errno != EEXIST) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, +@@ -828,7 +828,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + "setting ACLs on %s failed ", real_path); + } + +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed", real_path); +@@ -1529,7 +1529,7 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + } + + ignore: +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); +@@ -2167,7 +2167,7 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); + } + ignore: +- op_ret = posix_entry_create_xattr_set(this, real_path, xdata); ++ op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); + if (op_ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index d143d4c..6a1a35c 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1188,11 +1188,15 @@ posix_dump_buffer(xlator_t *this, const char *real_path, const char *key, + #endif + + int +-posix_handle_pair(xlator_t *this, const char *real_path, char *key, ++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf) + { + int sys_ret = -1; + int ret = 0; ++ int op_errno = 0; ++ struct mdata_iatt mdata_iatt = { ++ 0, ++ }; + #ifdef GF_DARWIN_HOST_OS + const int error_code = EINVAL; + #else +@@ -1216,6 +1220,23 @@ posix_handle_pair(xlator_t *this, const char *real_path, char *key, + /* ignore this key value pair */ + ret = 0; + goto out; ++ } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) { ++ /* This is either by rebalance or self heal. Create the xattr if it's ++ * not present. Compare and update the larger value if the xattr is ++ * already present. ++ */ ++ if (loc == NULL) { ++ ret = -EINVAL; ++ goto out; ++ } ++ posix_mdata_iatt_from_disk(&mdata_iatt, ++ (posix_mdata_disk_t *)value->data); ++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, ++ &mdata_iatt, &op_errno); ++ if (ret != 0) { ++ ret = -op_errno; ++ } ++ goto out; + } else { + sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags); + #ifdef GF_DARWIN_HOST_OS +@@ -1810,8 +1831,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) + return 0; + } + +- ret = posix_handle_pair(filler->this, filler->real_path, k, v, XATTR_CREATE, +- filler->stbuf); ++ ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, ++ XATTR_CREATE, filler->stbuf); + if (ret < 0) { + errno = -ret; + return -1; +@@ -1820,7 +1841,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) + } + + int +-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) ++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, ++ dict_t *dict) + { + int ret = -1; + +@@ -1834,6 +1856,7 @@ posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) + filler.this = this; + filler.real_path = path; + filler.stbuf = NULL; ++ filler.loc = loc; + + ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler); + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index e0ea85b..a2a518f 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -429,22 +429,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + &frame->root->ctime, stbuf, valid); + } + +- if (valid & GF_SET_ATTR_CTIME && !priv->ctime) { +- /* +- * If ctime is not enabled, we have no means to associate an +- * arbitrary ctime with the file, so as a fallback, we ignore +- * the ctime payload and update the file ctime to current time +- * (which is possible directly with the POSIX API). +- */ +- op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL); +- if (op_ret == -1) { +- op_errno = errno; +- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, +- "setattr (utimes) on %s " +- "failed", +- real_path); +- goto out; +- } ++ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { ++ posix_update_ctime_in_mdata(this, real_path, -1, loc->inode, ++ &frame->root->ctime, stbuf, valid); + } + + if (!valid) { +@@ -469,14 +456,6 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + goto out; + } + +- if (valid & GF_SET_ATTR_CTIME && priv->ctime) { +- /* +- * If we got ctime payload, we override +- * the ctime of statpost with that. +- */ +- statpost.ia_ctime = stbuf->ia_ctime; +- statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec; +- } + posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost); + + if (xdata) +@@ -592,6 +571,7 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt statpost = { + 0, + }; ++ struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int32_t ret = -1; +@@ -604,6 +584,9 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + ++ priv = this->private; ++ VALIDATE_OR_GOTO(priv, out); ++ + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); +@@ -656,6 +639,11 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + &frame->root->ctime, stbuf, valid); + } + ++ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { ++ posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode, ++ &frame->root->ctime, stbuf, valid); ++ } ++ + if (!valid) { + op_ret = sys_fchown(pfd->fd, -1, -1); + if (op_ret == -1) { +@@ -2578,7 +2566,7 @@ _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) + + filler = tmp; + +- return posix_handle_pair(filler->this, filler->real_path, k, v, ++ return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, + filler->flags, filler->stbuf); + } + +@@ -2641,27 +2629,27 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + ++ MAKE_INODE_HANDLE(real_path, this, loc, NULL); ++ if (!real_path) { ++ op_ret = -1; ++ op_errno = ESTALE; ++ goto out; ++ } ++ + ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); + if (ret == 0) { + /* This is initiated by lookup when ctime feature is enabled to create + * "trusted.glusterfs.mdata" xattr if not present. These are the files + * which were created when ctime feature is disabled. + */ +- ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, &mdata_iatt, +- &op_errno); ++ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, ++ &mdata_iatt, &op_errno); + if (ret != 0) { + op_ret = -1; + } + goto out; + } + +- MAKE_INODE_HANDLE(real_path, this, loc, NULL); +- if (!real_path) { +- op_ret = -1; +- op_errno = ESTALE; +- goto out; +- } +- + posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false); + + op_ret = -1; +@@ -2796,6 +2784,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + filler.real_path = real_path; + filler.this = this; + filler.stbuf = &preop; ++ filler.loc = loc; + + #ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c +index 532daa2..9efaf99 100644 +--- a/xlators/storage/posix/src/posix-metadata.c ++++ b/xlators/storage/posix/src/posix-metadata.c +@@ -56,6 +56,19 @@ posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) + out->atime.tv_nsec = be64toh(in->atime.tv_nsec); + } + ++void ++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in) ++{ ++ out->ia_ctime = be64toh(in->ctime.tv_sec); ++ out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec); ++ ++ out->ia_mtime = be64toh(in->mtime.tv_sec); ++ out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec); ++ ++ out->ia_atime = be64toh(in->atime.tv_sec); ++ out->ia_atime_nsec = be64toh(in->atime.tv_nsec); ++} ++ + /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ + static int + posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, +@@ -341,6 +354,7 @@ posix_compare_timespec(struct timespec *first, struct timespec *second) + + int + posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, ++ const char *realpath, + struct mdata_iatt *mdata_iatt, int *op_errno) + { + posix_mdata_t *mdata = NULL; +@@ -369,8 +383,8 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + goto unlock; + } + +- ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata, +- op_errno); ++ ret = posix_fetch_mdata_xattr(this, realpath, -1, inode, ++ (void *)mdata, op_errno); + if (ret == 0) { + /* Got mdata from disk. This is a race, another client + * has healed the xattr during lookup. So set it in inode +@@ -412,7 +426,7 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + } + } + +- ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata); ++ ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, + "gfid: %s key:%s ", uuid_utoa(inode->gfid), +@@ -445,7 +459,8 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, + GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, time, out); + +- if (update_utime && (!u_atime || !u_mtime)) { ++ if (update_utime && (flag->ctime && !time) && (flag->atime && !u_atime) && ++ (flag->mtime && !u_mtime)) { + goto out; + } + +@@ -652,6 +667,48 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + return; + } + ++/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs ++ * to be modified ++ */ ++void ++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid) ++{ ++ int32_t ret = 0; ++#if defined(HAVE_UTIMENSAT) ++ struct timespec tv_ctime = { ++ 0, ++ }; ++#else ++ struct timeval tv_ctime = { ++ 0, ++ }; ++#endif ++ posix_mdata_flag_t flag = { ++ 0, ++ }; ++ ++ struct posix_private *priv = NULL; ++ priv = this->private; ++ ++ if (inode && priv->ctime) { ++ tv_ctime.tv_sec = stbuf->ia_ctime; ++ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec); ++ flag.ctime = 1; ++ ++ ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL, ++ NULL, NULL, &flag, _gf_true); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, ++ "posix set mdata atime failed on file:" ++ " %s gfid:%s", ++ real_path, uuid_utoa(inode->gfid)); ++ } ++ } ++ return; ++} ++ + static void + posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) + { +diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h +index c176699..63e8771 100644 +--- a/xlators/storage/posix/src/posix-metadata.h ++++ b/xlators/storage/posix/src/posix-metadata.h +@@ -43,6 +43,10 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); + void ++posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, ++ inode_t *inode, struct timespec *ctime, ++ struct iatt *stbuf, int valid); ++void + posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, + int fd, inode_t *inode, struct iatt *stbuf); + void +@@ -56,7 +60,10 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out); + int + posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, ++ const char *realpath, + struct mdata_iatt *mdata_iatt, + int *op_errno); ++void ++posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in); + + #endif /* _POSIX_METADATA_H */ +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 64288a7..dd51062 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -339,7 +339,7 @@ dict_t * + posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd, + int fdnum, dict_t *xattr, struct iatt *buf); + int +-posix_handle_pair(xlator_t *this, const char *real_path, char *key, ++posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, + data_t *value, int flags, struct iatt *stbuf); + int + posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, +@@ -352,7 +352,8 @@ int + posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, + dict_t *xattr_req); + int +-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict); ++posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, ++ dict_t *dict); + + int + posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, +-- +1.8.3.1 + diff --git a/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch b/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch new file mode 100644 index 0000000..9f43ff8 --- /dev/null +++ b/SOURCES/0296-glusterfind-pre-command-failure-on-a-modify.patch @@ -0,0 +1,62 @@ +From bfb64a0e685eb5755ceda6c54690335564e135c9 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Mon, 16 Sep 2019 14:22:34 +0530 +Subject: [PATCH 296/297] glusterfind: pre command failure on a modify + +Label: DOWNSTREAM ONLY + +On upstream we have gfid_to_all_paths_using_gfid2path instead of +gfid_to_path_using_pgfid and so we do not hit this in upstream. + +Problem: On a modify, the pre commands runs through the find function. +where the number of arguments sent mismatches and causes a stderr. +The mismatch is because of both changelog and brickfind use the find(), +but the brickfind was alone handled. + +Fix: Have handled the additional argument on the changelog side as well. +Received it as a dummy variable for changelog. + +Change-Id: I5eecdd993e477b68a0e486db2ad7e56ba94bbf02 +fixes: bz#1733970 +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181095 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com> +Reviewed-by: Rinku Kothiya <rkothiya@redhat.com> +--- + tools/glusterfind/src/changelog.py | 5 +++-- + tools/glusterfind/src/utils.py | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index 40c381b..ef982db 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -141,8 +141,9 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args): + + # Length of brick path, to remove from output path + brick_path_len = len(brick) +- +- def output_callback(path, inode): ++ # is_dir is a dummy variable to make it compitable with the find ++ # used in brickfind ++ def output_callback(path, inode, is_dir): + # For each path found, encodes it and updates path1 + # Also updates converted flag in inodegfid table as 1 + path = path.strip() +diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py +index cc09903..e226c5a 100644 +--- a/tools/glusterfind/src/utils.py ++++ b/tools/glusterfind/src/utils.py +@@ -70,7 +70,7 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True, + else: + filter_result = filter_func(full_path) + if filter_result is not None: +- callback_func(full_path, filter_result) ++ callback_func(full_path, filter_result, None) + else: + filter_result = filter_func(full_path) + if filter_result is not None: +-- +1.8.3.1 + diff --git a/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch b/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch new file mode 100644 index 0000000..47b5da0 --- /dev/null +++ b/SOURCES/0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch @@ -0,0 +1,89 @@ +From 37555b6c83d3a979033111a754ee1728dab254f5 Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Wed, 18 Sep 2019 17:38:52 +0530 +Subject: [PATCH 297/297] rpmbuild: fixing the build errors with 2a905a8ae + +Label: DOWNSTREAM ONLY + +Have added a Makefile inside extras/quota to remove the +No rule to make target error for quota/log_accounting.sh + +Change-Id: Ia3f6b3fa21a0de7eb3bdb31b3d205139df412aca +fixes: bz#1719171 +Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181326 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + configure.ac | 1 + + extras/Makefile.am | 6 +----- + extras/quota/Makefile.am | 8 ++++++++ + 3 files changed, 10 insertions(+), 5 deletions(-) + create mode 100644 extras/quota/Makefile.am + +diff --git a/configure.ac b/configure.ac +index f597b86..327733e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -232,6 +232,7 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/reset/pre/Makefile + extras/python/Makefile + extras/snap_scheduler/Makefile ++ extras/quota/Makefile + events/Makefile + events/src/Makefile + events/src/eventsapiconf.py +diff --git a/extras/Makefile.am b/extras/Makefile.am +index 8cbfda1..31ccdf5 100644 +--- a/extras/Makefile.am ++++ b/extras/Makefile.am +@@ -12,7 +12,7 @@ EditorMode_DATA = glusterfs-mode.el glusterfs.vim + + SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \ + $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \ +- ganesha ++ ganesha quota + + confdir = $(sysconfdir)/glusterfs + if WITH_SERVER +@@ -30,14 +30,11 @@ endif + + scriptsdir = $(datadir)/glusterfs/scripts + scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh +-scripts_SCRIPTS += quota/log_accounting.sh + scripts_SCRIPTS += collect-system-stats.sh + scripts_SCRIPTS += identify-hangs.sh + if WITH_SERVER + scripts_SCRIPTS += post-upgrade-script-for-quota.sh \ + pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh +-scripts_SCRIPTS += quota/quota_fsck.py +-scripts_SCRIPTS += quota/xattr_analysis.py + if USE_SYSTEMD + scripts_SCRIPTS += control-cpu-load.sh + scripts_SCRIPTS += control-mem.sh +@@ -56,7 +53,6 @@ EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.co + stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \ + control-cpu-load.sh control-mem.sh group-distributed-virt \ + thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh \ +- quota/xattr_analysis.py quota/quota_fsck.py quota/log_accounting.sh \ + collect-system-stats.sh identify-hangs.sh + + if WITH_SERVER +diff --git a/extras/quota/Makefile.am b/extras/quota/Makefile.am +new file mode 100644 +index 0000000..cdb6be1 +--- /dev/null ++++ b/extras/quota/Makefile.am +@@ -0,0 +1,8 @@ ++scriptsdir = $(datadir)/glusterfs/scripts ++scripts_SCRIPTS = log_accounting.sh ++ ++if WITH_SERVER ++scripts_SCRIPTS += xattr_analysis.py quota_fsck.py ++endif ++ ++EXTRA_DIST = log_accounting.sh xattr_analysis.py quota_fsck.py +-- +1.8.3.1 + diff --git a/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch b/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch new file mode 100644 index 0000000..72daa15 --- /dev/null +++ b/SOURCES/0298-geo-rep-fix-sub-command-during-worker-connection.patch @@ -0,0 +1,56 @@ +From f65f4739914cf317da7e5eaa3b5a06fe64f338c2 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Sat, 21 Sep 2019 01:07:30 +0530 +Subject: [PATCH 298/302] geo-rep : fix sub-command during worker connection + +Problem: + +Geo-rep session for non-root going faulty. + +Solution: + +During worker start we do not construct slave url and use 'args.resource_remote' +which is basically just slave-hostname. +This works better for root session but fails in non-root session during +ssh command. +Using slave url solves this issue. + +Backport of: + >fixes: bz#1753928 + >Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +Upstream patch: + https://review.gluster.org/#/c/glusterfs/+/23465/ + +BUG: 1754407 +Change-Id: Ib83552fde77f81c208896494b323514ab37ebf22 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181895 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/subcmds.py | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py +index 8de7db2..f8515f2 100644 +--- a/geo-replication/syncdaemon/subcmds.py ++++ b/geo-replication/syncdaemon/subcmds.py +@@ -73,8 +73,11 @@ def subcmd_worker(args): + Popen.init_errhandler() + fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + local = GLUSTER("localhost", args.master) +- slavevol = args.slave.split("::")[-1] +- slavehost = args.resource_remote ++ slave_url, slavevol = args.slave.split("::") ++ if "@" not in slave_url: ++ slavehost = args.resource_remote ++ else: ++ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote) + remote = SSH(slavehost, slavevol) + remote.connect_remote() + local.connect() +-- +1.8.3.1 + diff --git a/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch b/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch new file mode 100644 index 0000000..9dea8cc --- /dev/null +++ b/SOURCES/0299-geo-rep-performance-improvement-while-syncing-rename.patch @@ -0,0 +1,156 @@ +From f293f7ac2f75c58d81da1229b484eb530b7083b5 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Fri, 20 Sep 2019 09:39:12 +0530 +Subject: [PATCH 299/302] geo-rep: performance improvement while syncing + renames with existing gfid + +Problem: +The bug[1] addresses issue of data inconsistency when handling RENAME with +existing destination. This fix requires some performance tuning considering +this issue occurs in heavy rename workload. + +Solution: +If distribution count for master volume is one do not verify op's on +master and go ahead with rename. + +The performance improvement with this patch can only be observed if +master volume has distribution count one. + +[1]. https://bugzilla.redhat.com/show_bug.cgi?id=1694820 +Backport of: + + >fixes: bz#1753857 + >Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +Upstream Patch: + https://review.gluster.org/#/c/glusterfs/+/23459/ + +BUG: 1726000 +Change-Id: I8e9bcd575e7e35f40f9f78b7961c92dee642f47b +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/181893 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + geo-replication/gsyncd.conf.in | 5 +++++ + geo-replication/syncdaemon/gsyncd.py | 2 ++ + geo-replication/syncdaemon/monitor.py | 2 ++ + geo-replication/syncdaemon/resource.py | 13 +++++++++++-- + geo-replication/syncdaemon/syncdutils.py | 11 +++++++++++ + 5 files changed, 31 insertions(+), 2 deletions(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 5ebd57a..9155cd8 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -23,6 +23,11 @@ configurable=false + type=int + value=1 + ++[master-distribution-count] ++configurable=false ++type=int ++value=1 ++ + [glusterd-workdir] + value = @GLUSTERD_WORKDIR@ + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index a4c6f32..6ae5269 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -134,6 +134,8 @@ def main(): + help="Directory where Gluster binaries exist on slave") + p.add_argument("--slave-access-mount", action="store_true", + help="Do not lazy umount the slave volume") ++ p.add_argument("--master-dist-count", type=int, ++ help="Master Distribution count") + + # Status + p = sp.add_parser("status") +diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py +index 234f3f1..236afe7 100644 +--- a/geo-replication/syncdaemon/monitor.py ++++ b/geo-replication/syncdaemon/monitor.py +@@ -37,6 +37,8 @@ def get_subvol_num(brick_idx, vol, hot): + tier = vol.is_tier() + disperse_count = vol.disperse_count(tier, hot) + replica_count = vol.replica_count(tier, hot) ++ distribute_count = vol.distribution_count(tier, hot) ++ gconf.setconfig("master-distribution-count", distribute_count) + + if (tier and not hot): + brick_idx = brick_idx - vol.get_hot_bricks_count(tier) +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index b16db60..189d8a1 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -377,6 +377,7 @@ class Server(object): + def entry_ops(cls, entries): + pfx = gauxpfx() + logging.debug('entries: %s' % repr(entries)) ++ dist_count = rconf.args.master_dist_count + + def entry_purge(op, entry, gfid, e, uid, gid): + # This is an extremely racy code and needs to be fixed ASAP. +@@ -686,9 +687,15 @@ class Server(object): + raise + else: + raise +- elif not matching_disk_gfid(gfid, en): ++ elif not matching_disk_gfid(gfid, en) and dist_count > 1: + collect_failure(e, EEXIST, uid, gid, True) + else: ++ # We are here which means matching_disk_gfid for ++ # both source and destination has returned false ++ # and distribution count for master vol is greater ++ # then one. Which basically says both the source and ++ # destination exist and not hardlinks. ++ # So we are safe to go ahead with rename here. + rename_with_disk_gfid_confirmation(gfid, entry, en, + uid, gid) + if blob: +@@ -1409,7 +1416,9 @@ class SSH(object): + '--slave-gluster-log-level', + gconf.get("slave-gluster-log-level"), + '--slave-gluster-command-dir', +- gconf.get("slave-gluster-command-dir")] ++ gconf.get("slave-gluster-command-dir"), ++ '--master-dist-count', ++ str(gconf.get("master-distribution-count"))] + + if gconf.get("slave-access-mount"): + args_to_slave.append('--slave-access-mount') +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 2ee10ac..aadaebd 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -926,6 +926,14 @@ class Volinfo(object): + else: + return int(self.get('disperseCount')[0].text) + ++ def distribution_count(self, tier, hot): ++ if (tier and hot): ++ return int(self.get('hotBricks/hotdistCount')[0].text) ++ elif (tier and not hot): ++ return int(self.get('coldBricks/colddistCount')[0].text) ++ else: ++ return int(self.get('distCount')[0].text) ++ + @property + @memoize + def hot_bricks(self): +@@ -994,6 +1002,9 @@ class VolinfoFromGconf(object): + def disperse_count(self, tier, hot): + return gconf.get("master-disperse-count") + ++ def distribution_count(self, tier, hot): ++ return gconf.get("master-distribution-count") ++ + @property + @memoize + def hot_bricks(self): +-- +1.8.3.1 + diff --git a/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch b/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch new file mode 100644 index 0000000..62bac41 --- /dev/null +++ b/SOURCES/0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch @@ -0,0 +1,70 @@ +From 039a3f81209706261fc809eac94564e81a3377da Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Wed, 25 Sep 2019 14:55:19 +0530 +Subject: [PATCH 300/302] cli: remove the warning displayed when remove brick + start issued + +remove-brick start command gives displays below error: + +It is recommended that remove-brick be run with cluster.force-migration +option disabled to prevent possible data corruption. Doing so will ensure +that files that receive writes during migration will not be migrated and +will need to be manually copied after the remove-brick commit operation. +Please check the value of the option and update accordingly. +Do you want to continue with your current cluster.force-migration settings? (y/n) + +As we are not qualifying cluster.force-migration for 3.5.0, +we should not display this message. So, removing it. + +Label: DOWNSTREAM ONLY + +BUG: 1755227 +Change-Id: I409f2059d43c5e867788f19d2ccb8d6d839520f7 +fixes: bz#1755227 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/182009 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + cli/src/cli-cmd-parser.c | 2 -- + cli/src/cli-cmd-volume.c | 11 ----------- + 2 files changed, 13 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 92ceb8e..4456a7b 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2101,8 +2101,6 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words, + wordcount--; + if (!strcmp("start", w)) { + command = GF_OP_CMD_START; +- if (question) +- *question = 1; + } else if (!strcmp("commit", w)) { + command = GF_OP_CMD_COMMIT; + } else if (!strcmp("stop", w)) { +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index a42e663..6b958bd 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2088,17 +2088,6 @@ cli_cmd_volume_remove_brick_cbk(struct cli_state *state, + "Remove-brick force will not migrate files from the " + "removed bricks, so they will no longer be available" + " on the volume.\nDo you want to continue?"; +- } else if (command == GF_OP_CMD_START) { +- question = +- "It is recommended that remove-brick be run with" +- " cluster.force-migration option disabled to prevent" +- " possible data corruption. Doing so will ensure that" +- " files that receive writes during migration will not" +- " be migrated and will need to be manually copied" +- " after the remove-brick commit operation. Please" +- " check the value of the option and update accordingly." +- " \nDo you want to continue with your current" +- " cluster.force-migration settings?"; + } + + if (!brick_count) { +-- +1.8.3.1 + diff --git a/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch b/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch new file mode 100644 index 0000000..270a0d7 --- /dev/null +++ b/SOURCES/0301-posix-Brick-is-going-down-unexpectedly.patch @@ -0,0 +1,61 @@ +From 913a0dc8f1eaa2fb18a6ebd6fcf66f46b48039f1 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Wed, 18 Sep 2019 19:11:33 +0530 +Subject: [PATCH 301/302] posix: Brick is going down unexpectedly + +Problem: In brick_mux environment, while multiple volumes are + created (1-1000) sometimes brick is going down due to + health_check thread failure + +Solution: Ignore EAGAIN error in health_check thread code to + avoid the issue + +> Change-Id: Id44c59f8e071a363a14d09d188813a6633855213 +> Fixes: bz#1751907 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit c4d926900dc36f71c04b3f65ceca5150ce0e8c81) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23437/) + +Change-Id: Id44c59f8e071a363a14d09d188813a6633855213 +BUG: 1731826 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/182106 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-helpers.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 6a1a35c..35dd3b6 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2108,14 +2108,20 @@ out: + if (fd != -1) { + sys_close(fd); + } ++ + if (ret && file_path[0]) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, + "%s() on %s returned ret is %d error is %s", op, file_path, ret, + ret != -1 ? strerror(ret) : strerror(op_errno)); +- gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, +- "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, +- file_path, strerror(op_errno), priv->hostname, priv->base_path, +- timeout); ++ ++ if ((op_errno == EAGAIN) || (ret == EAGAIN)) { ++ ret = 0; ++ } else { ++ gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, ++ "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, ++ file_path, strerror(op_errno), priv->hostname, ++ priv->base_path, timeout); ++ } + } + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch b/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch new file mode 100644 index 0000000..7972767 --- /dev/null +++ b/SOURCES/0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch @@ -0,0 +1,67 @@ +From fb1d503791c874296afab0cd7be59b6865340d72 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <jahernan@redhat.com> +Date: Wed, 25 Sep 2019 11:56:35 +0200 +Subject: [PATCH 302/302] cluster/ec: prevent filling shd log with "table not + found" messages + +When self-heal daemon receives an inodelk contention notification, it tries +to locate the related inode using inode_find() and the inode table owned by +top-most xlator, which in this case doesn't have any inode table. This causes +many messages to be logged by inode_find() function because the inode table +passed is NULL. + +This patch prevents this by making sure the inode table is not NULL before +calling inode_find(). + +Upstream patch: +> Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23481 +> Fixes: bz#1755344 +> Signed-off-by: Xavi Hernandez <jahernan@redhat.com> + +Change-Id: I8d001bd180aaaf1521ba40a536b097fcf70c991f +BUG: 1754790 +Signed-off-by: Xavi Hernandez <jahernan@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/182207 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 19094c4..3f31c74 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -463,6 +463,7 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall) + struct gf_upcall_cache_invalidation *ci = NULL; + struct gf_upcall_inodelk_contention *lc = NULL; + inode_t *inode; ++ inode_table_t *table; + + switch (upcall->event_type) { + case GF_UPCALL_CACHE_INVALIDATION: +@@ -476,8 +477,18 @@ ec_upcall(ec_t *ec, struct gf_upcall *upcall) + /* The lock is not owned by EC, ignore it. */ + return _gf_true; + } +- inode = inode_find(((xlator_t *)ec->xl->graph->top)->itable, +- upcall->gfid); ++ table = ((xlator_t *)ec->xl->graph->top)->itable; ++ if (table == NULL) { ++ /* Self-heal daemon doesn't have an inode table on the top ++ * xlator because it doesn't need it. In this case we should ++ * use the inode table managed by EC itself where all inodes ++ * being healed should be present. However self-heal doesn't ++ * use eager-locking and inodelk's are already released as ++ * soon as possible. In this case we can safely ignore these ++ * notifications. */ ++ return _gf_false; ++ } ++ inode = inode_find(table, upcall->gfid); + /* If inode is not found, it means that it's already released, + * so we can ignore it. Probably it has been released and + * destroyed while the contention notification was being sent. +-- +1.8.3.1 + diff --git a/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch b/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch new file mode 100644 index 0000000..8641353 --- /dev/null +++ b/SOURCES/0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch @@ -0,0 +1,45 @@ +From ae4f538065d26a277e38810c6eef18c0312cd1f3 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Thu, 26 Sep 2019 17:52:30 +0530 +Subject: [PATCH 303/304] posix: heketidbstorage bricks go down during PVC + creation + +Problem: In OCS environment heketidbstorage is detached due + to health_check thread is failed.Sometime aio_write + is not successfully finished within default health-check-timeout + limit and the brick is detached. + +Solution: To avoid the issue increase default timeout to 20s + +> Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b +> Fixes: bz#1755900 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit c6df9e962483bac5bfcd8916318b19040387ce81) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23495/) + +Change-Id: Idff283d5713da571f9d20a6b296274f69c3e5b7b +BUG: 1752713 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/182387 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Atin Mukherjee <amukherj@redhat.com> +--- + xlators/storage/posix/src/posix-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index 69857d9..2cb58ba 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -1257,7 +1257,7 @@ struct volume_options posix_options[] = { + {.key = {"health-check-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, +- .default_value = "10", ++ .default_value = "20", + .validate = GF_OPT_VALIDATE_MIN, + .description = + "Interval in seconds to wait aio_write finish for health check, " +-- +1.8.3.1 + diff --git a/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch b/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch new file mode 100644 index 0000000..5f16e0a --- /dev/null +++ b/SOURCES/0304-cluster-dht-Correct-fd-processing-loop.patch @@ -0,0 +1,194 @@ +From ad233c1b3abdfe2bdfd1eacc83b5f84b7afa6b46 Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Tue, 1 Oct 2019 17:37:15 +0530 +Subject: [PATCH 304/304] cluster/dht: Correct fd processing loop + +The fd processing loops in the +dht_migration_complete_check_task and the +dht_rebalance_inprogress_task functions were unsafe +and could cause an open to be sent on an already freed +fd. This has been fixed. + +> Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540 +> Fixes: bz#1757399 +> Signed-off-by: N Balachandran <nbalacha@redhat.com> +> (Cherry picked from commit 9b15867070b0cc241ab165886292ecffc3bc0aed) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23506/) + +Change-Id: I0a3c7d2fba314089e03dfd704f9dceb134749540 +BUG: 1756325 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/182826 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-helper.c | 84 ++++++++++++++++++++++++++---------- + 1 file changed, 62 insertions(+), 22 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c +index 4c57e0d..1e9fee0 100644 +--- a/xlators/cluster/dht/src/dht-helper.c ++++ b/xlators/cluster/dht/src/dht-helper.c +@@ -1261,6 +1261,7 @@ dht_migration_complete_check_task(void *data) + fd_t *tmp = NULL; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; ++ gf_boolean_t skip_open = _gf_false; + int open_failed = 0; + + this = THIS; +@@ -1399,24 +1400,34 @@ dht_migration_complete_check_task(void *data) + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ +- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) +- { +- if (fd_is_anonymous(iter_fd)) +- continue; +- +- if (dht_fd_open_on_dst(this, iter_fd, dst_node)) +- continue; +- ++ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list); ++ while (&iter_fd->inode_list != (&inode->fd_list)) { ++ if (fd_is_anonymous(iter_fd) || ++ (dht_fd_open_on_dst(this, iter_fd, dst_node))) { ++ if (!tmp) { ++ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd), ++ inode_list); ++ continue; ++ } ++ skip_open = _gf_true; ++ } + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. + */ +- __fd_ref(iter_fd); ++ fd_ref(iter_fd); + + UNLOCK(&inode->lock); + ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } ++ if (skip_open) ++ goto next; ++ + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ +@@ -1438,9 +1449,11 @@ dht_migration_complete_check_task(void *data) + dht_fd_ctx_set(this, iter_fd, dst_node); + } + +- fd_unref(iter_fd); +- ++ next: + LOCK(&inode->lock); ++ skip_open = _gf_false; ++ tmp = iter_fd; ++ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list); + } + + SYNCTASK_SETID(frame->root->uid, frame->root->gid); +@@ -1453,6 +1466,10 @@ dht_migration_complete_check_task(void *data) + + unlock: + UNLOCK(&inode->lock); ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } + + out: + if (dict) { +@@ -1534,6 +1551,7 @@ dht_rebalance_inprogress_task(void *data) + int open_failed = 0; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; ++ gf_boolean_t skip_open = _gf_false; + + this = THIS; + frame = data; +@@ -1654,24 +1672,40 @@ dht_rebalance_inprogress_task(void *data) + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ +- list_for_each_entry_safe(iter_fd, tmp, &inode->fd_list, inode_list) +- { +- if (fd_is_anonymous(iter_fd)) +- continue; +- +- if (dht_fd_open_on_dst(this, iter_fd, dst_node)) +- continue; +- ++ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list); ++ while (&iter_fd->inode_list != (&inode->fd_list)) { + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. + */ +- __fd_ref(iter_fd); + ++ if (fd_is_anonymous(iter_fd) || ++ (dht_fd_open_on_dst(this, iter_fd, dst_node))) { ++ if (!tmp) { ++ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd), ++ inode_list); ++ continue; ++ } ++ skip_open = _gf_true; ++ } ++ ++ /* Yes, this is ugly but there isn't a cleaner way to do this ++ * the fd_ref is an atomic increment so not too bad. We want to ++ * reduce the number of inode locks and unlocks. ++ */ ++ ++ fd_ref(iter_fd); + UNLOCK(&inode->lock); + ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } ++ if (skip_open) ++ goto next; ++ + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ +@@ -1692,9 +1726,11 @@ dht_rebalance_inprogress_task(void *data) + dht_fd_ctx_set(this, iter_fd, dst_node); + } + +- fd_unref(iter_fd); +- ++ next: + LOCK(&inode->lock); ++ skip_open = _gf_false; ++ tmp = iter_fd; ++ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list); + } + + SYNCTASK_SETID(frame->root->uid, frame->root->gid); +@@ -1702,6 +1738,10 @@ dht_rebalance_inprogress_task(void *data) + unlock: + UNLOCK(&inode->lock); + ++ if (tmp) { ++ fd_unref(tmp); ++ tmp = NULL; ++ } + if (open_failed) { + ret = -1; + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch b/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch new file mode 100644 index 0000000..a1e77c6 --- /dev/null +++ b/SOURCES/0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch @@ -0,0 +1,56 @@ +From 90e52f3b44da0ed05e35ebd474e284d45794b0d6 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Thu, 10 Oct 2019 20:40:49 +0530 +Subject: [PATCH 305/307] glusterd: rebalance start should fail when quorum is + not met + +rebalance start should not succeed if quorum is not met. +this patch adds a condition to check whether quorum is met +in pre-validation stage. + +> fixes: bz#1760467 +> Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/23536/ +BUG: 1760261 +Change-Id: Ic7d0d08f69e4bc6d5e7abae713ec1881531c8ad4 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183146 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/quorum-validation.t | 2 ++ + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t +index ff46729..3cc3351 100644 +--- a/tests/bugs/glusterd/quorum-validation.t ++++ b/tests/bugs/glusterd/quorum-validation.t +@@ -34,6 +34,8 @@ TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2 + TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start + TEST ! $CLI_1 volume set $V0 barrier enable + ++#quorum is not met, rebalance/profile start should fail ++TEST ! $CLI_1 volume rebalance $V0 start + TEST ! $CLI_1 volume profile $V0 start + + #bug-1690753 - Volume stop when quorum not met is successful +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index ec78913..a4915f3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -1059,7 +1059,8 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + goto out; + } + +- if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME) { ++ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME || ++ op == GD_OP_REBALANCE) { + ret = glusterd_validate_quorum(this, op, req_dict, op_errstr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET, +-- +1.8.3.1 + diff --git a/SOURCES/0306-cli-fix-distCount-value.patch b/SOURCES/0306-cli-fix-distCount-value.patch new file mode 100644 index 0000000..0e8b9f2 --- /dev/null +++ b/SOURCES/0306-cli-fix-distCount-value.patch @@ -0,0 +1,43 @@ +From 167980565e1ab56989b25fe6aa0203aeb7970c8b Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Sun, 6 Oct 2019 19:05:28 +0530 +Subject: [PATCH 306/307] cli: fix distCount value + +gluster volume info --xml id displaying wrong distCount +value. This patch addresses it. + +> fixes: bz#1758878 +> Change-Id: I64081597e06018361e6524587b433b0c4b2a0260 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/23521/ + +BUG: 1758618 +Change-Id: I64081597e06018361e6524587b433b0c4b2a0260 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183147 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-xml-output.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c +index b417bb8..006e2fb 100644 +--- a/cli/src/cli-xml-output.c ++++ b/cli/src/cli-xml-output.c +@@ -2548,8 +2548,9 @@ cli_xml_output_vol_info(cli_local_t *local, dict_t *dict) + ret = dict_get_int32(dict, key, &dist_count); + if (ret) + goto out; +- ret = xmlTextWriterWriteFormatElement( +- local->writer, (xmlChar *)"distCount", "%d", dist_count); ++ ret = xmlTextWriterWriteFormatElement(local->writer, ++ (xmlChar *)"distCount", "%d", ++ (brick_count / dist_count)); + XML_RET_CHECK_AND_GOTO(ret, out); + + snprintf(key, sizeof(key), "volume%d.stripe_count", i); +-- +1.8.3.1 + diff --git a/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch b/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch new file mode 100644 index 0000000..7a85b50 --- /dev/null +++ b/SOURCES/0307-ssl-fix-RHEL8-regression-failure.patch @@ -0,0 +1,42 @@ +From be9695391f39fe6eb1d157f6bfd018116d1ee42b Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Mon, 30 Sep 2019 13:14:06 +0530 +Subject: [PATCH 307/307] ssl: fix RHEL8 regression failure + +This tests is failing with +"SSL routines:SSL_CTX_use_certificate:ee key too small" +in RHEL8. This change is made according to +https://access.redhat.com/solutions/4157431 + +> updates: bz#1756900 +> Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/23501/ + +BUG: 1704562 +Change-Id: Ib436372c3bd94bcf7324976337add7da4088b3d5 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183148 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/cli/bug-1320388.t | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t +index f5ffcbe..8e5d77b 100755 +--- a/tests/bugs/cli/bug-1320388.t ++++ b/tests/bugs/cli/bug-1320388.t +@@ -21,7 +21,7 @@ cleanup; + rm -f $SSL_BASE/glusterfs.* + touch "$GLUSTERD_WORKDIR"/secure-access + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 3072 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +-- +1.8.3.1 + diff --git a/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch b/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch new file mode 100644 index 0000000..adbeb43 --- /dev/null +++ b/SOURCES/0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch @@ -0,0 +1,347 @@ +From 27f799563c1c2c1986662ed4a3a83d834c04fd98 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Mon, 14 Oct 2019 15:42:31 +0530 +Subject: [PATCH 308/308] dht: Rebalance causing IO Error - File descriptor in + bad state + +Problem : When a file is migrated, dht attempts to re-open all open + fds on the new cached subvol. Earlier, if dht had not opened the fd, + the client xlator would be unable to find the remote fd and would + fall back to using an anon fd for the fop. That behavior changed with + https://review.gluster.org/#/c/glusterfs/+/15804, causing fops to fail + with EBADFD if the fd was not available on the cached subvol. + The client xlator returns EBADFD if the remote fd is not found but + dht only checks for EBADF before re-opening fds on the new cached subvol. + +Solution: Handle EBADFD at dht code path to avoid the issue + +> Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8 +> Fixes: bz#1758579 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit 9314a9fbf487614c736cf6c4c1b93078d37bb9df) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23518/) + +Change-Id: I43c51995cdd48d05b12e4b2889c8dbe2bb2a72d8 +BUG: 1758432 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183370 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 27 +++++++++++++++++--- + xlators/cluster/dht/src/dht-common.h | 19 ++++++++++++++ + xlators/cluster/dht/src/dht-helper.c | 29 +++++++++++++++++++++ + xlators/cluster/dht/src/dht-inode-read.c | 42 +++++++++++++++++++++++++++---- + xlators/cluster/dht/src/dht-inode-write.c | 16 ++++++------ + 5 files changed, 116 insertions(+), 17 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 99cccd6..37952ba 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -53,6 +53,17 @@ dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req); + int + dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc); + ++/* Check the xdata to make sure EBADF has been set by client xlator */ ++int32_t ++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno) ++{ ++ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) && ++ !(local->fd_checked)) { ++ return 1; ++ } ++ return 0; ++} ++ + /* Sets the blocks and size values to fixed values. This is to be called + * only for dirs. The caller is responsible for checking the type + */ +@@ -4529,6 +4540,7 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int this_call_cnt = 0; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; ++ int ret = 0; + + VALIDATE_OR_GOTO(frame, err); + VALIDATE_OR_GOTO(frame->local, err); +@@ -4537,6 +4549,13 @@ dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + conf = this->private; + local = frame->local; + ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ++ ret = dht_check_and_open_fd_on_subvol(this, frame); ++ if (ret) ++ goto err; ++ return 0; ++ } ++ + LOCK(&frame->lock); + { + if (!xattr || (op_ret == -1)) { +@@ -5204,8 +5223,8 @@ dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local->op_errno = op_errno; + +- if ((local->fop == GF_FOP_FSETXATTR) && op_ret == -1 && +- (op_errno == EBADF) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FSETXATTR) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -5929,8 +5948,8 @@ dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local->op_errno = op_errno; + +- if ((local->fop == GF_FOP_FREMOVEXATTR) && (op_ret == -1) && +- (op_errno == EBADF) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FREMOVEXATTR) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index c516271..ce11f02 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1230,6 +1230,22 @@ dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata); + + int ++dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata); ++ ++int ++dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, ++ int op_errno, dict_t *xattr, dict_t *xdata); ++ ++int ++dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata); ++int ++dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); ++ ++int + gf_defrag_status_get(dht_conf_t *conf, dict_t *dict); + + void +@@ -1525,4 +1541,7 @@ int + dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata); + ++int32_t ++dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno); ++ + #endif /* _DHT_H */ +diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c +index 1e9fee0..4f7370d 100644 +--- a/xlators/cluster/dht/src/dht-helper.c ++++ b/xlators/cluster/dht/src/dht-helper.c +@@ -366,6 +366,23 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame, + + break; + ++ case GF_FOP_FXATTROP: ++ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, ++ subvol->fops->fxattrop, local->fd, ++ local->rebalance.flags, local->rebalance.xattr, ++ local->xattr_req); ++ break; ++ ++ case GF_FOP_FGETXATTR: ++ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, ++ local->fd, local->key, NULL); ++ break; ++ ++ case GF_FOP_FINODELK: ++ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk, ++ local->key, local->fd, local->rebalance.lock_cmd, ++ &local->rebalance.flock, local->xattr_req); ++ break; + default: + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", fd, +@@ -429,6 +446,18 @@ handle_err: + DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL); + break; + ++ case GF_FOP_FXATTROP: ++ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL); ++ break; ++ ++ case GF_FOP_FGETXATTR: ++ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL); ++ break; ++ ++ case GF_FOP_FINODELK: ++ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL); ++ break; ++ + default: + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", fd, +diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c +index cacfe35..0c209a5 100644 +--- a/xlators/cluster/dht/src/dht-inode-read.c ++++ b/xlators/cluster/dht/src/dht-inode-read.c +@@ -162,8 +162,8 @@ dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + local = frame->local; + prev = cookie; + +- if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1) && (op_errno == EBADF) && +- !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FSTAT) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -431,7 +431,7 @@ dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + if (local->call_cnt != 1) + goto out; + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -703,7 +703,7 @@ dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + if (local->call_cnt != 1) + goto out; + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -820,7 +820,7 @@ dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + + local->op_errno = op_errno; + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -1223,6 +1223,13 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + if (local->call_cnt != 1) + goto out; + ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ++ ret = dht_check_and_open_fd_on_subvol(this, frame); ++ if (ret) ++ goto out; ++ return 0; ++ } ++ + ret = dht_read_iatt_from_xdata(this, xdata, &stbuf); + + if ((!op_ret) && (ret)) { +@@ -1535,8 +1542,26 @@ dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + + { ++ dht_local_t *local = NULL; ++ int ret = 0; ++ ++ GF_VALIDATE_OR_GOTO("dht", frame, out); ++ GF_VALIDATE_OR_GOTO("dht", this, out); ++ GF_VALIDATE_OR_GOTO("dht", frame->local, out); ++ ++ local = frame->local; ++ ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { ++ ret = dht_check_and_open_fd_on_subvol(this, frame); ++ if (ret) ++ goto out; ++ return 0; ++ } ++ ++out: + dht_lk_inode_unref(frame, op_ret); + DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata); ++ + return 0; + } + +@@ -1574,6 +1599,13 @@ dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + if (ret) + goto err; + */ ++ local->rebalance.flock = *lock; ++ local->rebalance.lock_cmd = cmd; ++ local->key = gf_strdup(volume); ++ ++ if (xdata) ++ local->xattr_req = dict_ref(xdata); ++ + STACK_WIND(frame, dht_finodelk_cbk, lock_subvol, + lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata); + +diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c +index b26b705..b6b349d 100644 +--- a/xlators/cluster/dht/src/dht-inode-write.c ++++ b/xlators/cluster/dht/src/dht-inode-write.c +@@ -49,7 +49,7 @@ dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * We only check once as this could be a valid bad fd error. + */ + +- if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -262,8 +262,8 @@ dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * We only check once as this could actually be a valid error. + */ + +- if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1) && +- ((op_errno == EBADF) || (op_errno == EINVAL)) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FTRUNCATE) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -489,7 +489,7 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * We only check once as this could actually be a valid error. + */ + +- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -666,7 +666,7 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ +- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -838,7 +838,7 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could actually be a valid error. + */ +- if ((op_ret == -1) && (op_errno == EBADF) && !(local->fd_checked)) { ++ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +@@ -1005,8 +1005,8 @@ dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + local->op_errno = op_errno; + +- if ((local->fop == GF_FOP_FSETATTR) && (op_ret == -1) && +- (op_errno == EBADF) && !(local->fd_checked)) { ++ if ((local->fop == GF_FOP_FSETATTR) && ++ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch b/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch new file mode 100644 index 0000000..6ae359e --- /dev/null +++ b/SOURCES/0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch @@ -0,0 +1,240 @@ +From 2b1738402276f43d7cb64542b74cb50145e46d77 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Wed, 16 Oct 2019 14:25:47 +0530 +Subject: [PATCH 309/309] geo-rep: Fix config upgrade on non-participating node + +After upgrade, if the config files are of old format, it +gets migrated to new format. Monitor process migrates it. +Since monitor doesn't run on nodes where bricks are not +hosted, it doesn't get migrated there. So this patch fixes +the config upgrade on nodes which doesn't host bricks. +This happens during config either on get/set/reset. + +Backport of: + > Patch: https://review.gluster.org/23555 + > Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + > fixes: bz#1762220 + +Change-Id: Ibade2f2310b0f3affea21a3baa1ae0eb71162cba +Signed-off-by: Kotresh HR <khiremat@redhat.com> +BUG: 1760939 +Reviewed-on: https://code.engineering.redhat.com/gerrit/183461 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/gsyncd.py | 3 +- + tests/00-geo-rep/georep-config-upgrade.t | 132 +++++++++++++++++++++++++++++++ + tests/00-geo-rep/gsyncd.conf.old | 47 +++++++++++ + 3 files changed, 181 insertions(+), 1 deletion(-) + create mode 100644 tests/00-geo-rep/georep-config-upgrade.t + create mode 100644 tests/00-geo-rep/gsyncd.conf.old + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 6ae5269..7b48d82 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -255,7 +255,8 @@ def main(): + if args.subcmd == "slave": + override_from_args = True + +- if args.subcmd == "monitor": ++ if config_file is not None and \ ++ args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]: + ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"]) + if ret is not None: + gconf.config_upgrade(config_file, ret) +diff --git a/tests/00-geo-rep/georep-config-upgrade.t b/tests/00-geo-rep/georep-config-upgrade.t +new file mode 100644 +index 0000000..557461c +--- /dev/null ++++ b/tests/00-geo-rep/georep-config-upgrade.t +@@ -0,0 +1,132 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../geo-rep.rc ++. $(dirname $0)/../env.rc ++ ++SCRIPT_TIMEOUT=300 ++OLD_CONFIG_PATH=$(dirname $0)/gsyncd.conf.old ++WORKING_DIR=/var/lib/glusterd/geo-replication/master_127.0.0.1_slave ++ ++##Cleanup and start glusterd ++cleanup; ++TEST glusterd; ++TEST pidof glusterd ++ ++##Variables ++GEOREP_CLI="$CLI volume geo-replication" ++master=$GMV0 ++SH0="127.0.0.1" ++slave=${SH0}::${GSV0} ++num_active=2 ++num_passive=2 ++master_mnt=$M0 ++slave_mnt=$M1 ++ ++############################################################ ++#SETUP VOLUMES AND GEO-REPLICATION ++############################################################ ++ ++##create_and_start_master_volume ++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $GMV0 ++ ++##create_and_start_slave_volume ++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4}; ++TEST $CLI volume start $GSV0 ++ ++##Create, start and mount meta_volume ++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; ++TEST $CLI volume start $META_VOL ++TEST mkdir -p $META_MNT ++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT ++ ++##Mount master ++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++############################################################ ++#BASIC GEO-REPLICATION TESTS ++############################################################ ++ ++#Create geo-rep session ++TEST create_georep_session $master $slave ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Enable_metavolume ++TEST $GEOREP_CLI $master $slave config use_meta_volume true ++ ++#Wait for common secret pem file to be created ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file ++ ++#Verify the keys are distributed ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed ++ ++#Start_georep ++TEST $GEOREP_CLI $master $slave start ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active" ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive" ++ ++TEST $GEOREP_CLI $master $slave config sync-method tarssh ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Copy old config file ++mv -f $WORKING_DIR/gsyncd.conf $WORKING_DIR/gsyncd.conf.org ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++ ++#Check if config get all updates config_file ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if config get updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config sync-method ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if config set updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config sync-xattrs false ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if config reset updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave config \!sync-xattrs ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Check if geo-rep start updates config_file ++rm -f $WORKING_DIR/gsyncd.conf ++cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf ++TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf ++TEST $GEOREP_CLI $master $slave start ++TEST grep "sync-method" $WORKING_DIR/gsyncd.conf ++ ++#Stop geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Delete Geo-rep ++TEST $GEOREP_CLI $master $slave delete ++ ++#Cleanup authorized keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++ ++cleanup; ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +diff --git a/tests/00-geo-rep/gsyncd.conf.old b/tests/00-geo-rep/gsyncd.conf.old +new file mode 100644 +index 0000000..519acaf +--- /dev/null ++++ b/tests/00-geo-rep/gsyncd.conf.old +@@ -0,0 +1,47 @@ ++[__meta__] ++version = 2.0 ++ ++[peersrx . .] ++remote_gsyncd = /usr/local/libexec/glusterfs/gsyncd ++georep_session_working_dir = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/ ++ssh_command_tar = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem ++changelog_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}-changes.log ++working_dir = /var/lib/misc/glusterfsd/${mastervol}/${eSlave} ++ignore_deletes = false ++pid_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid ++state_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status ++gluster_command_dir = /usr/local/sbin/ ++gluster_params = aux-gfid-mount acl ++ssh_command = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/secret.pem ++state_detail_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status ++state_socket_unencoded = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket ++socketdir = /var/run/gluster ++log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}.log ++gluster_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}.gluster.log ++special_sync_mode = partial ++change_detector = changelog ++pid-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid ++state-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status ++ ++[__section_order__] ++peersrx . . = 0 ++peersrx . %5essh%3a = 2 ++peersrx . = 3 ++peers master slave = 4 ++ ++[peersrx . %5Essh%3A] ++remote_gsyncd = /nonexistent/gsyncd ++ ++[peersrx .] ++gluster_command_dir = /usr/local/sbin/ ++gluster_params = aux-gfid-mount acl ++log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log ++log_file_mbr = /var/log/glusterfs/geo-replication-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log ++gluster_log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log ++ ++[peers master slave] ++session_owner = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214 ++master.stime_xattr_name = trusted.glusterfs.0732cbd1-3ec5-4920-ab0d-aa5a896d5214.07a9005c-ace4-4f67-b3c0-73938fb236c4.stime ++volume_id = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214 ++use_tarssh = true ++ +-- +1.8.3.1 + diff --git a/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch b/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch new file mode 100644 index 0000000..a38a4aa --- /dev/null +++ b/SOURCES/0310-tests-test-case-for-non-root-geo-rep-setup.patch @@ -0,0 +1,284 @@ +From c2decfb59bd1be7cd2b0d792fd2ca2627913638a Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Tue, 24 Sep 2019 18:22:13 +0530 +Subject: [PATCH 310/313] tests : test case for non-root geo-rep setup + +Added test case for non-root geo-rep setup. + +Backport of: + > Patch: https://review.gluster.org/22902 + > Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98 + > fixes: bz#1717827 + > Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +Change-Id: Ib6ebee79949a9f61bdc5c7b5e11b51b262750e98 +BUG: 1763412 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183664 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/00-geo-rep/00-georep-verify-non-root-setup.t | 251 +++++++++++++++++++++ + 1 file changed, 251 insertions(+) + create mode 100644 tests/00-geo-rep/00-georep-verify-non-root-setup.t + +diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +new file mode 100644 +index 0000000..e753c1f +--- /dev/null ++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +@@ -0,0 +1,251 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../geo-rep.rc ++. $(dirname $0)/../env.rc ++ ++SCRIPT_TIMEOUT=500 ++ ++### Basic Non-root geo-rep setup test with Distribute Replicate volumes ++ ++##Cleanup and start glusterd ++cleanup; ++TEST glusterd; ++TEST pidof glusterd ++ ++ ++##Variables ++GEOREP_CLI="$CLI volume geo-replication" ++master=$GMV0 ++SH0="127.0.0.1" ++slave=${SH0}::${GSV0} ++num_active=2 ++num_passive=2 ++master_mnt=$M0 ++slave_mnt=$M1 ++ ++##User and group to be used for non-root geo-rep setup ++usr="nroot" ++grp="ggroup" ++ ++slave_url=$usr@$slave ++slave_vol=$GSV0 ++ssh_url=$usr@$SH0 ++ ++############################################################ ++#SETUP VOLUMES AND VARIABLES ++ ++##create_and_start_master_volume ++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $GMV0 ++ ++##create_and_start_slave_volume ++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4}; ++TEST $CLI volume start $GSV0 ++ ++##Mount master ++#TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++#TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++ ++########################################################## ++#TEST FUNCTIONS ++ ++function distribute_key_non_root() ++{ ++ ${GLUSTER_LIBEXECDIR}/set_geo_rep_pem_keys.sh $usr $master $slave_vol ++ echo $? ++} ++ ++ ++function check_status_non_root() ++{ ++ local search_key=$1 ++ $GEOREP_CLI $master $slave_url status | grep -F "$search_key" | wc -l ++} ++ ++ ++function check_and_clean_group() ++{ ++ if [ $(getent group $grp) ] ++ then ++ groupdel $grp; ++ echo $? ++ else ++ echo 0 ++ fi ++} ++ ++function clean_lock_files() ++{ ++ if [ ! -f /etc/passwd.lock ]; ++ then ++ rm -rf /etc/passwd.lock; ++ fi ++ ++ if [ ! -f /etc/group.lock ]; ++ then ++ rm -rf /etc/group.lock; ++ fi ++ ++ if [ ! -f /etc/shadow.lock ]; ++ then ++ rm -rf /etc/shadow.lock; ++ fi ++ ++ if [ ! -f /etc/gshadow.lock ]; ++ then ++ rm -rf /etc/gshadow.lock; ++ fi ++} ++ ++ ++########################################################### ++#SETUP NON-ROOT GEO REPLICATION ++ ++##Create ggroup group ++##First test if group exists and then create new one ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group ++ ++##cleanup *.lock files ++ ++clean_lock_files ++ ++TEST /usr/sbin/groupadd $grp ++ ++clean_lock_files ++##Create non-root user and assign it to newly created group ++ ++TEST /usr/sbin/useradd -G $grp $usr ++ ++##Modify password for non-root user to have control over distributing ssh-key ++echo "$usr:pass" | chpasswd ++ ++##Set up mountbroker root ++TEST gluster-mountbroker setup /var/mountbroker-root $grp ++ ++##Associate volume and non-root user to the mountbroker ++TEST gluster-mountbroker add $slave_vol $usr ++ ++##Check ssh setting for clear text passwords ++sed '/^PasswordAuthentication /{s/no/yes/}' -i /etc/ssh/sshd_config && grep '^PasswordAuthentication ' /etc/ssh/sshd_config && service sshd restart ++ ++ ++##Restart glusterd to reflect mountbroker changages ++TEST killall_gluster; ++TEST glusterd; ++TEST pidof glusterd; ++ ++ ++ ++##Create, start and mount meta_volume ++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; ++TEST $CLI volume start $META_VOL ++TEST mkdir -p $META_MNT ++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT ++ ++##Mount master ++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++## Check status of mount-broker ++TEST gluster-mountbroker status ++ ++ ++##Setup password-less ssh for non-root user ++#sshpass -p "pass" ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url ++##Run ssh agent ++eval "$(ssh-agent -s)" ++PASS="pass" ++ ++ ++##Create a temp script to echo the SSH password, used by SSH_ASKPASS ++ ++SSH_ASKPASS_SCRIPT=/tmp/ssh-askpass-script ++cat > ${SSH_ASKPASS_SCRIPT} <<EOL ++#!/bin/bash ++echo "${PASS}" ++EOL ++chmod u+x ${SSH_ASKPASS_SCRIPT} ++ ++##set no display, necessary for ssh to use with setsid and SSH_ASKPASS ++#export DISPLAY=:0 ++ ++export SSH_ASKPASS=${SSH_ASKPASS_SCRIPT} ++ ++DISPLAY=: setsid ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url ++ ++##Setting up PATH for gluster binaries in case of source installation ++##ssh -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $ssh_url "echo "export PATH=$PATH:/usr/local/sbin" >> ~/.bashrc" ++ ++##Creating secret pem pub file ++TEST gluster-georep-sshkey generate ++ ++##Create geo-rep non-root setup ++ ++TEST $GEOREP_CLI $master $slave_url create push-pem ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave_url config gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++#Config gluster-command-dir ++TEST $GEOREP_CLI $master $slave_url config slave-gluster-command-dir ${GLUSTER_CMD_DIR} ++ ++## Test for key distribution ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 distribute_key_non_root ++ ++##Wait for common secret pem file to be created ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file ++ ++#Enable_metavolume ++TEST $GEOREP_CLI $master $slave config use_meta_volume true ++ ++#Start_georep ++TEST $GEOREP_CLI $master $slave_url start ++ ++## Meta volume is enabled so looking for 2 Active and 2 Passive sessions ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Active" ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Passive" ++ ++#Pause geo-replication session ++TEST $GEOREP_CLI $master $slave_url pause ++ ++#Resume geo-replication session ++TEST $GEOREP_CLI $master $slave_url resume ++ ++#Validate failure of volume stop when geo-rep is running ++TEST ! $CLI volume stop $GMV0 ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave_url stop ++ ++#Delete Geo-rep ++TEST $GEOREP_CLI $master $slave_url delete ++ ++#Cleanup authorized_keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++ ++#clear mountbroker ++gluster-mountbroker remove --user $usr ++gluster-mountbroker remove --volume $slave_vol ++ ++#delete group and user created for non-root setup ++TEST userdel -r -f $usr ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group ++ ++##password script cleanup ++rm -rf /tmp/ssh-askpass-script ++ ++ ++cleanup; ++ +-- +1.8.3.1 + diff --git a/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch b/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch new file mode 100644 index 0000000..af0206a --- /dev/null +++ b/SOURCES/0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch @@ -0,0 +1,186 @@ +From 4a2441e76f4240568093080769ede07bb7fb2016 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Sun, 20 Oct 2019 01:01:39 +0530 +Subject: [PATCH 311/313] geo-rep: Fix Permission denied traceback on non root + setup + +Problem: +While syncing rename of directory in hybrid crawl, geo-rep +crashes as below. + +Traceback (most recent call last): + File "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in worker + res = getattr(self.obj, rmeth)(*in_data[2:]) + File "/usr/local/libexec/glusterfs/python/syncdaemon/resource.py", line 588, in entry_ops + src_entry = get_slv_dir_path(slv_host, slv_volume, gfid) + File "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 687, in get_slv_dir_path + [ENOENT], [ESTALE]) + File "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 546, in errno_wrap + return call(*arg) +PermissionError: [Errno 13] Permission denied: '/bricks/brick1/b1/.glusterfs/8e/c0/8ec0fcd4-d50f-4a6e-b473-a7943ab66640' + +Cause: +Conversion of gfid to path for a directory uses readlink on backend +.glusterfs gfid path. But this fails for non root user with +permission denied. + +Fix: +Use gfid2path interface to get the path from gfid + +Backport of: + > Patch: https://review.gluster.org/23570 + > Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222 + > fixes: bz#1763439 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +Change-Id: I9d40c713a1b32cea95144cbc0f384ada82972222 +BUG: 1763412 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183665 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/gsyncd.py | 3 +- + geo-replication/syncdaemon/syncdutils.py | 35 ++++++++++++++++------ + tests/00-geo-rep/00-georep-verify-non-root-setup.t | 30 +++++++++++++++---- + 3 files changed, 52 insertions(+), 16 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 7b48d82..8940384 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -231,7 +231,8 @@ def main(): + # Set default path for config file in that case + # If an subcmd accepts config file then it also accepts + # master and Slave arguments. +- if config_file is None and hasattr(args, "config_file"): ++ if config_file is None and hasattr(args, "config_file") \ ++ and args.subcmd != "slave": + config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % ( + GLUSTERD_WORKDIR, + args.master, +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index aadaebd..b08098e 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -57,6 +57,7 @@ from hashlib import sha256 as sha256 + + # auxiliary gfid based access prefix + _CL_AUX_GFID_PFX = ".gfid/" ++ROOT_GFID = "00000000-0000-0000-0000-000000000001" + GF_OP_RETRIES = 10 + + GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0' +@@ -670,6 +671,7 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + global slv_bricks + + dir_path = ENOENT ++ pfx = gauxpfx() + + if not slv_bricks: + slv_info = Volinfo(slv_volume, slv_host, master=False) +@@ -683,15 +685,30 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + gfid[2:4], + gfid], [ENOENT], [ESTALE]) + if dir_path != ENOENT: +- realpath = errno_wrap(os.readlink, [dir_path], +- [ENOENT], [ESTALE]) +- if not isinstance(realpath, int): +- realpath_parts = realpath.split('/') +- pargfid = realpath_parts[-2] +- basename = realpath_parts[-1] +- pfx = gauxpfx() +- dir_entry = os.path.join(pfx, pargfid, basename) +- return dir_entry ++ try: ++ realpath = errno_wrap(os.readlink, [dir_path], ++ [ENOENT], [ESTALE]) ++ if not isinstance(realpath, int): ++ realpath_parts = realpath.split('/') ++ pargfid = realpath_parts[-2] ++ basename = realpath_parts[-1] ++ dir_entry = os.path.join(pfx, pargfid, basename) ++ return dir_entry ++ except OSError: ++ # .gfid/GFID ++ gfidpath = unescape_space_newline(os.path.join(pfx, gfid)) ++ realpath = errno_wrap(Xattr.lgetxattr_buf, ++ [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE]) ++ if not isinstance(realpath, int): ++ basename = os.path.basename(realpath).rstrip('\x00') ++ dirpath = os.path.dirname(realpath) ++ if dirpath is "/": ++ pargfid = ROOT_GFID ++ else: ++ dirpath = dirpath.strip("/") ++ pargfid = get_gfid_from_mnt(dirpath) ++ dir_entry = os.path.join(pfx, pargfid, basename) ++ return dir_entry + + return None + +diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +index e753c1f..c9fd8b2 100644 +--- a/tests/00-geo-rep/00-georep-verify-non-root-setup.t ++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +@@ -118,8 +118,8 @@ clean_lock_files + TEST /usr/sbin/groupadd $grp + + clean_lock_files +-##Create non-root user and assign it to newly created group +- ++##Del if exists and create non-root user and assign it to newly created group ++userdel -r -f $usr + TEST /usr/sbin/useradd -G $grp $usr + + ##Modify password for non-root user to have control over distributing ssh-key +@@ -140,8 +140,6 @@ TEST killall_gluster; + TEST glusterd; + TEST pidof glusterd; + +- +- + ##Create, start and mount meta_volume + TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; + TEST $CLI volume start $META_VOL +@@ -225,6 +223,26 @@ TEST $GEOREP_CLI $master $slave_url resume + #Validate failure of volume stop when geo-rep is running + TEST ! $CLI volume stop $GMV0 + ++#Hybrid directory rename test BZ#1763439 ++TEST $GEOREP_CLI $master $slave_url config change_detector xsync ++mkdir ${master_mnt}/dir1 ++mkdir ${master_mnt}/dir1/dir2 ++mkdir ${master_mnt}/dir1/dir3 ++mkdir ${master_mnt}/hybrid_d1 ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir2 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/dir3 ++ ++mv ${master_mnt}/hybrid_d1 ${master_mnt}/hybrid_rn_d1 ++mv ${master_mnt}/dir1/dir2 ${master_mnt}/rn_dir2 ++mv ${master_mnt}/dir1/dir3 ${master_mnt}/dir1/rn_dir3 ++ ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_rn_d1 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/rn_dir2 ++EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/rn_dir3 ++ + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave_url stop + +@@ -232,8 +250,8 @@ TEST $GEOREP_CLI $master $slave_url stop + TEST $GEOREP_CLI $master $slave_url delete + + #Cleanup authorized_keys +-sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys +-sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' /home/$usr/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' /home/$usr/.ssh/authorized_keys + + #clear mountbroker + gluster-mountbroker remove --user $usr +-- +1.8.3.1 + diff --git a/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch b/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch new file mode 100644 index 0000000..bf8c820 --- /dev/null +++ b/SOURCES/0312-Scripts-quota_fsck-script-KeyError-contri_size.patch @@ -0,0 +1,59 @@ +From b1d8a5ee8b2e320aaaf9b2a145fbc285178d07bb Mon Sep 17 00:00:00 2001 +From: hari gowtham <hgowtham@redhat.com> +Date: Tue, 22 Oct 2019 15:11:03 +0530 +Subject: [PATCH 312/313] Scripts: quota_fsck script KeyError: 'contri_size' + + back-port of: https://review.gluster.org/#/c/glusterfs/+/23586/ + +Problem: In a certain code flow, we weren't handling the +unavailability of the contri value in the dict. Trying to print +without the value resulted in erroring out. + +Fix: Have printed the whole of dictionary as the values will be +helpful in understanding the state of the file/dir + +>Fixes: bz#1764129 +>Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725 +>Signed-off-by: hari gowtham <hgowtham@redhat.com> + +BUG: 1719171 +Change-Id: I99c538adb712f281ca10e4e0088f404f515b9725 +Signed-off-by: hari gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183720 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/quota/quota_fsck.py | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py +index f03895d..485a37a 100755 +--- a/extras/quota/quota_fsck.py ++++ b/extras/quota/quota_fsck.py +@@ -52,17 +52,17 @@ epilog_msg=''' + + def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None): + if log_type == QUOTA_VERBOSE: +- print('%-24s %-60s\nxattr_values: %s\n%s\n' % {"Verbose", path, xattr_dict, stbuf}) ++ print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf)) + elif log_type == QUOTA_META_ABSENT: +- print('%-24s %-60s\n%s\n' % {"Quota-Meta Absent", path, xattr_dict}) ++ print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict)) + elif log_type == QUOTA_SIZE_MISMATCH: + print("mismatch") + if dir_size is not None: +- print('%24s %60s %12s %12s' % {"Size Mismatch", path, xattr_dict['contri_size'], +- dir_size}) ++ print('%24s %60s %12s %12s' % ("Size Mismatch", path, ++ xattr_dict, dir_size)) + else: +- print('%-24s %-60s %-12i %-12i' % {"Size Mismatch", path, xattr_dict['contri_size'], +- stbuf.st_size}) ++ print('%-24s %-60s %-12i %-12i' % ("Size Mismatch", path, xattr_dict, ++ stbuf.st_size)) + + def size_differs_lot(s1, s2): + ''' +-- +1.8.3.1 + diff --git a/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch b/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch new file mode 100644 index 0000000..e4887b8 --- /dev/null +++ b/SOURCES/0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch @@ -0,0 +1,60 @@ +From 23091d24d34102c7938ae2890930b73c89c5a8e7 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Tue, 22 Oct 2019 18:52:25 +0530 +Subject: [PATCH 313/313] extras: Cgroup(CPU/Mem) restriction are not working + on gluster process + +Problem: After Configure the Cgroup(CPU/MEM) limit to a gluster processes + resource(CPU/MEM) limits are not applicable to the gluster + processes.Cgroup limits are not applicable because all threads are + not moved into a newly created cgroup to apply restriction. + +Solution: To move a gluster thread to newly created cgroup change the + condition in script + +> Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c +> Fixes: bz#1764208 +> (Cherry pick from commit 38de02012948013a88597545cf49380ce97f6fa7) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23599/) +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> + +Change-Id: I8ad81c69200e4ec43a74f6052481551cf835354c +BUG: 1764202 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183730 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/control-cpu-load.sh | 2 +- + extras/control-mem.sh | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh +index b739c82..52dcf62 100755 +--- a/extras/control-cpu-load.sh ++++ b/extras/control-cpu-load.sh +@@ -104,7 +104,7 @@ echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup." + echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us + + if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then +- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`; ++ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`; + do + echo ${thid} > ${LOC}/${cgroup_name}/tasks ; + done +diff --git a/extras/control-mem.sh b/extras/control-mem.sh +index 38aa2a0..91b36f8 100755 +--- a/extras/control-mem.sh ++++ b/extras/control-mem.sh +@@ -116,7 +116,7 @@ else + fi + + if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then +- for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`; ++ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`; + do + echo ${thid} > ${LOC}/${cgroup_name}/tasks ; + done +-- +1.8.3.1 + diff --git a/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch new file mode 100644 index 0000000..adde426 --- /dev/null +++ b/SOURCES/0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch @@ -0,0 +1,38 @@ +From 2a4f19df70276ba41db19938507297f7580286fa Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Fri, 25 Oct 2019 18:07:27 +0530 +Subject: [PATCH 314/314] glusterd/tier: is_tier_enabled inserted causing + checksum mismatch + +the volfile entry is_tier_enabled is checked for version 3.7.6 while it was +supposed to check for 3.10. this is to fix it downstream only but changing the +version of check to 3.13.1 + +Label: DOWNSTREAM ONLY +BUG: 1765555 +Change-Id: Id631f3ba520b3e7b126c7607dca1bb7874532e81 +Signed-off-by: Atin Mukherjee <amukherj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/183932 +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Tested-by: Sanju Rakonde <srakonde@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 4889217..8a10eb8 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -1036,7 +1036,7 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo) + if (ret) + goto out; + } +- if (conf->op_version >= GD_OP_VERSION_3_10_0) { ++ if (conf->op_version >= GD_OP_VERSION_3_13_1) { + snprintf(buf, sizeof(buf), "%d", volinfo->is_tier_enabled); + ret = gf_store_save_value(fd, GF_TIER_ENABLED, buf); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch new file mode 100644 index 0000000..a0448cc --- /dev/null +++ b/SOURCES/0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch @@ -0,0 +1,52 @@ +From 4a04e1b5540921db22f1894f71eb30342127192d Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Tue, 12 Nov 2019 21:53:20 +0530 +Subject: [PATCH 315/316] geo-rep: Fix py2/py3 compatibility in repce + +Geo-rep fails to start on python2 only machine like +centos6. It fails with "ImportError no module named _io". +This patch fixes the same. + +Backport of: + > Patch: https://review.gluster.org/23702 + > fixes: bz#1771577 + > Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +BUG: 1771524 +Change-Id: I8228458a853a230546f9faf29a0e9e0f23b3efec +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185377 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunny Kumar <sunkumar@redhat.com> +--- + geo-replication/syncdaemon/repce.py | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py +index 6065b82..c622afa 100644 +--- a/geo-replication/syncdaemon/repce.py ++++ b/geo-replication/syncdaemon/repce.py +@@ -8,7 +8,6 @@ + # cases as published by the Free Software Foundation. + # + +-import _io + import os + import sys + import time +@@ -58,9 +57,9 @@ def recv(inf): + """load an object from input stream + python2 and python3 compatibility, inf is sys.stdin + and is opened as text stream by default. Hence using the +- buffer attribute ++ buffer attribute in python3 + """ +- if isinstance(inf, _io.TextIOWrapper): ++ if hasattr(inf, "buffer"): + return pickle.load(inf.buffer) + else: + return pickle.load(inf) +-- +1.8.3.1 + diff --git a/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch new file mode 100644 index 0000000..c2045a0 --- /dev/null +++ b/SOURCES/0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch @@ -0,0 +1,51 @@ +From b9a19aef5de94eb91162448ad687f2d2d194f82c Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 14 Nov 2019 09:55:15 +0000 +Subject: [PATCH 316/316] spec: fixed python-prettytable dependency for rhel6 + +Installing glusterfs on rhel6 was failing with python-prettytable +dependency as it required python2-prettytable for glusterfs-events. +This patch conditionally sets the python version for rhel7 and +fixes the problem. + +Label: DOWNSTREAM ONLY + +BUG: 1771614 + +Change-Id: I6288daa5d8c2d82a6d73a0d9722786a2a99b9db5 +fixes: bz#1771614 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185385 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 3c2e2dc..eeadb65 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -706,7 +706,7 @@ This package provides the translators needed on any GlusterFS client. + %package events + Summary: GlusterFS Events + Requires: %{name}-server%{?_isa} = %{version}-%{release} +-Requires: python%{_pythonver} python%{_pythonver}-prettytable ++Requires: python%{_pythonver} + Requires: python%{_pythonver}-gluster = %{version}-%{release} + %if ( 0%{?rhel} && 0%{?rhel} < 8 ) + Requires: python-requests +@@ -714,7 +714,10 @@ Requires: python-requests + Requires: python%{_pythonver}-requests + %endif + %if ( 0%{?rhel} && 0%{?rhel} < 7 ) ++Requires: python-prettytable + Requires: python-argparse ++%else ++Requires: python%{_pythonver}-prettytable + %endif + %if ( 0%{?_with_systemd:1} ) + %{?systemd_requires} +-- +1.8.3.1 + diff --git a/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch b/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..eccf2e3 --- /dev/null +++ b/SOURCES/0317-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,43 @@ +From 985ef94c63859907339c11b158e4540a5568d638 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Mon, 18 Nov 2019 02:25:25 -0500 +Subject: [PATCH 317/335] Update rfc.sh to rhgs-3.5.1 + +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +--- + README | 9 +++++++++ + rfc.sh | 2 +- + 2 files changed, 10 insertions(+), 1 deletion(-) + create mode 100644 README + +diff --git a/README b/README +new file mode 100644 +index 0000000..44a118b +--- /dev/null ++++ b/README +@@ -0,0 +1,9 @@ ++ ++'master' branch is just dummy branch in downstream. Any reference to 'upstream' ++will point to http://git.gluster.org. ++ ++You can checkout the release specific branch by running below command ++ bash$ git checkout -t -b rhs-x.y origin/rhs-x.y ++ ++Happy Hacking!! ++ +diff --git a/rfc.sh b/rfc.sh +index 94c92ef..69ddd2b 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.0"; ++branch="rhgs-3.5.1"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch b/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch new file mode 100644 index 0000000..e65ae38 --- /dev/null +++ b/SOURCES/0318-Update-rfc.sh-to-rhgs-3.5.1.patch @@ -0,0 +1,114 @@ +From 1f03327887645be2500cd29f69f7a77a4f5d0164 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Mon, 18 Nov 2019 14:25:12 -0500 +Subject: [PATCH 318/335] Update rfc.sh to rhgs-3.5.1 + +Removed the checks for updates and fixes from rfc.sh + +Label: DOWNSTREAM ONLY + +Change-Id: I436c959aa3b3366cd313b29f41c2466c4072efd7 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +--- + rfc.sh | 47 ++++++++--------------------------------------- + 1 file changed, 8 insertions(+), 39 deletions(-) + +diff --git a/rfc.sh b/rfc.sh +index 69ddd2b..918fb11 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -129,13 +129,8 @@ editor_mode() + + if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then + # see note above function warn_reference_missing for regex elaboration +- # Lets first check for github issues +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?#[[:digit:]]+" | awk -F '#' '{print $2}'); +- if [ "x${ref}" = "x" ]; then +- # if not found, check for bugs +- ref=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+bz#[[:digit:]]+" | awk -F '#' '{print $2}'); +- fi + ++ ref=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+") + if [ "x${ref}" != "x" ]; then + return; + fi +@@ -157,16 +152,6 @@ editor_mode() + bz_string="" + fi + +- echo "Select yes '(y)' if this patch fixes the bug/feature completely," +- echo -n "or is the last of the patchset which brings feature (Y/n): " +- read fixes +- fixes_string="fixes" +- if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then +- fixes_string="updates" +- fi +- +- sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: ${bz_string}#${bug}/;}" $1 > $1.new && \ +- mv $1.new $1; + return; + done + fi +@@ -234,8 +219,8 @@ check_patches_for_coding_style() + # IOW, the above helps us find the pattern with leading or training spaces + # or non word consituents like , or ; + # +-# [fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS]) +-# Finds 'fixes' OR 'updates' in any case combination ++# [bB][uU][gG] ++# Finds 'bug' in any case + # + # (:)? + # Followed by an optional : (colon) +@@ -256,28 +241,11 @@ warn_reference_missing() + echo "" + echo "=== Missing a reference in commit! ===" + echo "" +- echo "Gluster commits are made with a reference to a bug or a github issue" +- echo "" +- echo "Submissions that are enhancements (IOW, not functional" +- echo "bug fixes, but improvements of any nature to the code) are tracked" +- echo "using github issues [1]." ++ echo "You must give BUG: <bugid>" + echo "" +- echo "Submissions that are bug fixes are tracked using Bugzilla [2]." ++ echo "for example:" + echo "" +- echo "A check on the commit message, reveals that there is no bug or" +- echo "github issue referenced in the commit message" +- echo "" +- echo "[1] https://github.com/gluster/glusterfs/issues/new" +- echo "[2] https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS" +- echo "" +- echo "Please file an issue or a bug report and reference the same in the" +- echo "commit message using the following tags:" +- echo "GitHub Issues:" +- echo "\"Fixes: gluster/glusterfs#n\" OR \"Updates: gluster/glusterfs#n\"," +- echo "\"Fixes: #n\" OR \"Updates: #n\"," +- echo "Bugzilla ID:" +- echo "\"Fixes: bz#n\" OR \"Updates: bz#n\"," +- echo "where n is the issue or bug number" ++ echo "BUG: 1234567" + echo "" + echo "You may abort the submission choosing 'N' below and use" + echo "'git commit --amend' to add the issue reference before posting" +@@ -312,7 +280,7 @@ main() + assert_diverge; + + # see note above function warn_reference_missing for regex elaboration +- reference=$(git log -n1 --format='%b' | grep -ow -E "([fF][iI][xX][eE][sS]|[uU][pP][dD][aA][tT][eE][sS])(:)?[[:space:]]+(gluster\/glusterfs)?(bz)?#[[:digit:]]+" | awk -F '#' '{print $2}'); ++ reference=$(git log -n1 --format='%b' | grep -ow -E "^[bB][uU][gG](:)[[:space:]]+[[:digit:]]+" | awk '{print $2}') + + # If this is a commit against master and does not have a bug ID or a github + # issue reference. Warn the contributor that one of the 2 is required +@@ -320,6 +288,7 @@ main() + warn_reference_missing; + fi + ++ + # TODO: add clang-format command here. It will after the changes are done everywhere else + clang_format=$(clang-format --version) + if [ ! -z "${clang_format}" ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch new file mode 100644 index 0000000..d37efaf --- /dev/null +++ b/SOURCES/0319-features-snapview-server-obtain-the-list-of-snapshot.patch @@ -0,0 +1,48 @@ +From 659bd2a0fde9ba0cb8fc3905bcdb63d91e3dfa9d Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat <raghavendra@redhat.com> +Date: Tue, 2 Jul 2019 16:50:23 -0400 +Subject: [PATCH 319/335] features/snapview-server: obtain the list of + snapshots inside the lock + +The current list of snapshots from priv->dirents is obtained outside +the lock. + +Upstream patch: +> Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +> Fixes: bz#1726783 +> Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com> +> patch: https://review.gluster.org/#/c/glusterfs/+/22990/ + +BUG: 1731513 +Change-Id: I8876ec0a38308da5db058397382fbc82cc7ac177 +Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185838 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/snapview-server/src/snapview-server-mgmt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c +index bc415ef..3d64383 100644 +--- a/xlators/features/snapview-server/src/snapview-server-mgmt.c ++++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c +@@ -256,7 +256,6 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + this = frame->this; + ctx = frame->this->ctx; + priv = this->private; +- old_dirents = priv->dirents; + + if (!ctx) { + errno = EINVAL; +@@ -388,6 +387,7 @@ mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count, + LOCK(&priv->snaplist_lock); + { + oldcount = priv->num_snaps; ++ old_dirents = priv->dirents; + for (i = 0; i < priv->num_snaps; i++) { + for (j = 0; j < snapcount; j++) { + if ((!strcmp(old_dirents[i].name, dirents[j].name)) && +-- +1.8.3.1 + diff --git a/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch new file mode 100644 index 0000000..48a9cad --- /dev/null +++ b/SOURCES/0320-gf-event-Handle-unix-volfile-servers.patch @@ -0,0 +1,58 @@ +From 7e5d8dcb4f557eaca259e8d81cf34d651907396c Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Thu, 24 Oct 2019 12:24:35 +0530 +Subject: [PATCH 320/335] gf-event: Handle unix volfile-servers + +Problem: +glfsheal program uses unix-socket-based volfile server. +volfile server will be the path to socket in this case. +gf_event expects this to be hostname in all cases. So getaddrinfo +will fail on the unix-socket path, events won't be sent in this case. + +Fix: +In case of unix sockets, default to localhost + +upstream-patch: https://review.gluster.org/c/glusterfs/+/23606 +BUG: 1758923 +Change-Id: I60d27608792c29d83fb82beb5fde5ef4754bece8 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185851 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/events.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 9d33783..4e2f8f9 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -43,6 +43,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo *result = NULL; + xlator_t *this = THIS; + int sin_family = AF_INET; ++ char *volfile_server_transport = NULL; + + /* Global context */ + ctx = THIS->ctx; +@@ -62,8 +63,16 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + ++ if (ctx) { ++ volfile_server_transport = ctx->cmd_args.volfile_server_transport; ++ } ++ ++ if (!volfile_server_transport) { ++ volfile_server_transport = "tcp"; ++ } + /* Get Host name to send message */ +- if (ctx && ctx->cmd_args.volfile_server) { ++ if (ctx && ctx->cmd_args.volfile_server && ++ (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set + use that information to push the events. */ + if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints, +-- +1.8.3.1 + diff --git a/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch new file mode 100644 index 0000000..8dec96f --- /dev/null +++ b/SOURCES/0321-Adding-white-spaces-to-description-of-set-group.patch @@ -0,0 +1,55 @@ +From 5e7a2ad35a174d6d0ee5ed58a3e27955e85aa47c Mon Sep 17 00:00:00 2001 +From: kshithijiyer <kshithij.ki@gmail.com> +Date: Mon, 24 Jun 2019 20:08:48 +0530 +Subject: [PATCH 321/335] Adding white spaces to description of set group. + +The description of set group is missing spaces which +leads to the description look like: +volume set <VOLNAME> group <GROUP> - This option can be used for +setting multiple pre-defined volume optionswhere group_name is a +file under /var/lib/glusterd/groups containing onekey, value pair +per line + +Instead of: +volume set <VOLNAME> group <GROUP> - This option can be used for +setting multiple pre-defined volume options where group_name is a +file under /var/lib/glusterd/groups containing one key value +pair per line + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22934/ +> Fixes: bz#1723455 +> Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +> Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> + +BUG: 1724021 +Change-Id: I4957988c0c1f35f043db3f64089c049193e60e8f +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185756 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-volume.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 6b958bd..66beb1b 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3393,10 +3393,10 @@ struct cli_cmd volume_cmds[] = { + {"volume set <VOLNAME> <KEY> <VALUE>", cli_cmd_volume_set_cbk, + "set options for volume <VOLNAME>"}, + +- {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk, +- "This option can be used for setting multiple pre-defined volume options" +- "where group_name is a file under /var/lib/glusterd/groups containing one" +- "key, value pair per line"}, ++ {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk, ++ "This option can be used for setting multiple pre-defined volume options " ++ "where group_name is a file under /var/lib/glusterd/groups containing one " ++ "key value pair per line"}, + + {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick"}, +-- +1.8.3.1 + diff --git a/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch new file mode 100644 index 0000000..35a234b --- /dev/null +++ b/SOURCES/0322-glusterd-display-correct-rebalance-data-size-after-g.patch @@ -0,0 +1,65 @@ +From 9be255f76c78fcbbda1e3a72eb2e99d3aface53e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Wed, 16 Oct 2019 23:26:03 +0530 +Subject: [PATCH 322/335] glusterd: display correct rebalance data size after + glusterd restart + +Problem: After completion of rebalance, if glusterd is restarted, +rebalance status displays wrong rebalance data size in its output. + +Cause: While glusterd restoring the information from /var/lib/glusterd/ +into its memory, glusterd fetches rebalance_data from +/var/lib/glusterd/vols/volname/node_state.info. This value is +converted into an integer using atoi(), which is returning +incorrect value for larger values. + +Solution: use sscanf() instead of atoi() to convert string to +integer(in this case it is unsigned long) + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23560/ +> fixes: bz#1762438 +> Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1761486 +Change-Id: Icbdb096919612b4a1d6fb0e315f09d38900abf4e +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185752 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-store.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index 8a10eb8..b3b5ee9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -2974,19 +2974,19 @@ glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo) + volinfo->rebal.op = atoi(value); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) { +- volinfo->rebal.rebalance_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) { +- volinfo->rebal.rebalance_data = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) { +- volinfo->rebal.lookedup_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) { +- volinfo->rebal.rebalance_failures = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) { +- volinfo->rebal.skipped_files = atoi(value); ++ sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files); + } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, + SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) { + volinfo->rebal.rebalance_time = atoi(value); +-- +1.8.3.1 + diff --git a/SOURCES/0323-cli-display-detailed-rebalance-info.patch b/SOURCES/0323-cli-display-detailed-rebalance-info.patch new file mode 100644 index 0000000..a00faf8 --- /dev/null +++ b/SOURCES/0323-cli-display-detailed-rebalance-info.patch @@ -0,0 +1,101 @@ +From 852c475040a599ed35798dbb388c6b59c1d0a820 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 22 Oct 2019 15:06:29 +0530 +Subject: [PATCH 323/335] cli: display detailed rebalance info + +Problem: When one of the node is down in cluster, +rebalance status is not displaying detailed +information. + +Cause: In glusterd_volume_rebalance_use_rsp_dict() +we are aggregating rsp from all the nodes into a +dictionary and sending it to cli for printing. While +assigning a index to keys we are considering all the +peers instead of considering only the peers which are +up. Because of which, index is not reaching till 1. +while parsing the rsp cli unable to find status-1 +key in dictionary and going out without printing +any information. + +Solution: The simplest fix for this without much +code change is to continue to look for other keys +when status-1 key is not found. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23588 +> fixes: bz#1764119 +> Change-Id: I0062839933c9706119eb85416256eade97e976dc +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1761326 +Change-Id: I0062839933c9706119eb85416256eade97e976dc +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185749 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-rpc-ops.c | 21 ++++++++++++++------- + tests/bugs/glusterd/rebalance-in-cluster.t | 9 +++++++++ + 2 files changed, 23 insertions(+), 7 deletions(-) + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index b167e26..4e91265 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -1597,13 +1597,20 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type, + goto out; + } + +- snprintf(key, sizeof(key), "status-1"); +- +- ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); +- if (ret) { +- gf_log("cli", GF_LOG_TRACE, "count %d %d", count, 1); +- gf_log("cli", GF_LOG_TRACE, "failed to get status"); +- goto out; ++ for (i = 1; i <= count; i++) { ++ snprintf(key, sizeof(key), "status-%d", i); ++ ret = dict_get_int32(dict, key, (int32_t *)&status_rcd); ++ /* If information from a node is missing we should skip ++ * the node and try to fetch information of other nodes. ++ * If information is not found for all nodes, we should ++ * error out. ++ */ ++ if (!ret) ++ break; ++ if (ret && i == count) { ++ gf_log("cli", GF_LOG_TRACE, "failed to get status"); ++ goto out; ++ } + } + + /* Fix layout will be sent to all nodes for the volume +diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t +index 9565fae..469ec6c 100644 +--- a/tests/bugs/glusterd/rebalance-in-cluster.t ++++ b/tests/bugs/glusterd/rebalance-in-cluster.t +@@ -4,6 +4,10 @@ + . $(dirname $0)/../../cluster.rc + . $(dirname $0)/../../volume.rc + ++function rebalance_status_field_1 { ++ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p ++} ++ + cleanup; + TEST launch_cluster 2; + TEST $CLI_1 peer probe $H2; +@@ -29,6 +33,11 @@ TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 + TEST $CLI_1 volume rebalance $V0 start + EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0 + ++#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm ++TEST kill_glusterd 2 ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0 ++ ++TEST start_glusterd 2 + #bug-1245142 + + $CLI_1 volume rebalance $V0 start & +-- +1.8.3.1 + diff --git a/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch new file mode 100644 index 0000000..26e1577 --- /dev/null +++ b/SOURCES/0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch @@ -0,0 +1,128 @@ +From dcf3f74fa7e812dfe89667bd6219f70a8457f755 Mon Sep 17 00:00:00 2001 +From: Anoop C S <anoopcs@redhat.com> +Date: Thu, 6 Jun 2019 18:33:19 +0530 +Subject: [PATCH 324/335] extras/hooks: Add SELinux label on new bricks during + add-brick + +Backport of https://review.gluster.org/c/glusterfs/+/22834 + +Change-Id: Ifd8ae5eeb91b968cc1a9a9b5d15844c5233d56db +BUG: 1686800 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185855 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../add-brick/post/S10selinux-label-brick.sh | 100 +++++++++++++++++++++ + 1 file changed, 100 insertions(+) + create mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh + +diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +new file mode 100755 +index 0000000..4a17c99 +--- /dev/null ++++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +@@ -0,0 +1,100 @@ ++#!/bin/bash ++# ++# Install to hooks/<HOOKS_VER>/add-brick/post ++# ++# Add an SELinux file context for each brick using the glusterd_brick_t type. ++# This ensures that the brick is relabeled correctly on an SELinux restart or ++# restore. Subsequently, run a restore on the brick path to set the selinux ++# labels. ++# ++### ++ ++PROGNAME="Sselinux" ++OPTSPEC="volname:,version:,gd-workdir:,volume-op:" ++VOL= ++ ++parse_args () { ++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") ++ eval set -- "${ARGS}" ++ ++ while true; do ++ case ${1} in ++ --volname) ++ shift ++ VOL=${1} ++ ;; ++ --gd-workdir) ++ shift ++ GLUSTERD_WORKDIR=$1 ++ ;; ++ --version) ++ shift ++ ;; ++ --volume-op) ++ shift ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac ++ shift ++ done ++} ++ ++set_brick_labels() ++{ ++ local volname="${1}" ++ local fctx ++ local list=() ++ ++ fctx="$(semanage fcontext --list -C)" ++ ++ # wait for new brick path to be updated under ++ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ ++ sleep 5 ++ ++ # grab the path for each local brick ++ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) ++ ++ # create a list of bricks for which custom SELinux ++ # label doesn't exist ++ for b in ${brickdirs}; do ++ pattern="${b}(/.*)?" ++ echo "${fctx}" | grep "^${pattern}\s" >/dev/null ++ if [[ $? -ne 0 ]]; then ++ list+=("${pattern}") ++ fi ++ done ++ ++ # Add a file context for each brick path in the list and associate with the ++ # glusterd_brick_t SELinux type. ++ for p in ${list[@]} ++ do ++ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" ++ done ++ ++ # Set the labels for which SELinux label was added above ++ for b in ${brickdirs} ++ do ++ echo "${list[@]}" | grep "${b}" >/dev/null ++ if [[ $? -eq 0 ]]; then ++ restorecon -R "${b}" ++ fi ++ done ++} ++ ++SELINUX_STATE=$(which getenforce && getenforce) ++[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 ++ ++parse_args "$@" ++[ -z "${VOL}" ] && exit 1 ++ ++set_brick_labels "${VOL}" ++ ++exit 0 +-- +1.8.3.1 + diff --git a/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch new file mode 100644 index 0000000..8e5a5fa --- /dev/null +++ b/SOURCES/0325-extras-hooks-Install-and-package-newly-added-post-ad.patch @@ -0,0 +1,52 @@ +From 27d69d8927a946562aef08a6edfee38b9998f96d Mon Sep 17 00:00:00 2001 +From: Anoop C S <anoopcs@redhat.com> +Date: Wed, 12 Jun 2019 15:41:27 +0530 +Subject: [PATCH 325/335] extras/hooks: Install and package newly added post + add-brick hook script + +Previously a new SELinux hook script was added as a post add-brick +operation to label new brick paths. But the change failed to install +and package new script. Therefore making necessary changes to Makefile +and spec file to get it installed and packaged. + +Backport of https://review.gluster.org/c/glusterfs/+/22856 + +Change-Id: I67b8f4982c2783c34a4bc749fb4387c19a038225 +BUG: 1686800 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185856 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/hook-scripts/add-brick/post/Makefile.am | 4 ++-- + glusterfs.spec.in | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am +index bfc0c1c..9b236df 100644 +--- a/extras/hook-scripts/add-brick/post/Makefile.am ++++ b/extras/hook-scripts/add-brick/post/Makefile.am +@@ -1,6 +1,6 @@ +-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ + if WITH_SERVER +-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh ++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh + endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index eeadb65..91180db 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1447,6 +1447,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh +-- +1.8.3.1 + diff --git a/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch new file mode 100644 index 0000000..b0afcc7 --- /dev/null +++ b/SOURCES/0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch @@ -0,0 +1,51 @@ +From a4f01ad90a0c0dfd0655da509c5ed2a11a507cc3 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Mon, 17 Jun 2019 11:10:42 +0530 +Subject: [PATCH 326/335] tests: subdir-mount.t is failing for brick_mux + regrssion + +To avoid the failure wait to run hook script S13create-subdir-mounts.sh +after executed add-brick command by test case. + +This is required as a dependency for the bz referenced below. + +Backport of https://review.gluster.org/c/glusterfs/+/22877 + +Change-Id: I063b6d0f86a550ed0a0527255e4dfbe8f0a8c02e +BUG: 1686800 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185857 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/features/subdir-mount.t | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t +index 8401946..a02bd6b 100644 +--- a/tests/features/subdir-mount.t ++++ b/tests/features/subdir-mount.t +@@ -85,12 +85,17 @@ TEST $CLI volume start $V0 + TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2 + TEST stat $M2 + ++initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` + # mount shouldn't fail even after add-brick + TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6}; + +-# Give time for client process to get notified and use the new +-# volfile after add-brick +-sleep 1 ++# Wait to execute create-subdir-mounts.sh script by glusterd ++newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++while [ $newcnt -eq $initcnt ] ++do ++ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` ++ sleep 1 ++done + + # Existing mount should still be active + mount_inode=$(stat --format "%i" "$M2") +-- +1.8.3.1 + diff --git a/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch new file mode 100644 index 0000000..e3e42fa --- /dev/null +++ b/SOURCES/0327-glusterfind-integrate-with-gfid2path.patch @@ -0,0 +1,93 @@ +From f89242132dc4756c827113154cc6ad18ad6bde88 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Tue, 19 Feb 2019 12:49:12 +0530 +Subject: [PATCH 327/335] glusterfind: integrate with gfid2path + +Integration with gfid2path helps avoid file-system crawl and saves +precious time. Extended attributes starting with "trusted.gfid2path." +are read and the <PGFID>/<BN> values are extracted and the <PGFID> is +iteratively resolved from the brick backend to arrive at the full path. + +>Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +>fixes: #529 +>Signed-off-by: Milind Changire <mchangir@redhat.com> +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/22225/ +BUG: 1599802 +Change-Id: I593b02880e3413b77bfceed4a36b00d401f03bc0 +Signed-off-by: Milind Changire <mchangir@redhat.com> +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185706 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tools/glusterfind/src/changelog.py | 45 ++++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index ef982db..d8f97e0 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -114,6 +114,43 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): + continue + + ++def enum_hard_links_using_gfid2path(brick, gfid, args): ++ hardlinks = [] ++ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) ++ if not os.path.isdir(p): ++ # we have a symlink or a normal file ++ try: ++ file_xattrs = xattr.list(p) ++ for x in file_xattrs: ++ if x.startswith("trusted.gfid2path."): ++ # get the value for the xattr i.e. <PGFID>/<BN> ++ v = xattr.getxattr(p, x) ++ pgfid, bn = v.split(os.sep) ++ try: ++ path = symlink_gfid_to_path(brick, pgfid) ++ fullpath = os.path.join(path, bn) ++ fullpath = output_path_prepare(fullpath, args) ++ hardlinks.append(fullpath) ++ except (IOError, OSError) as e: ++ logger.warn("Error converting to path: %s" % e) ++ continue ++ except (IOError, OSError): ++ pass ++ return hardlinks ++ ++ ++def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args): ++ path = "" ++ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): ++ gfid = row[3].strip() ++ logger.debug("Processing gfid %s" % gfid) ++ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args) ++ ++ path = ",".join(hardlinks) ++ ++ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) ++ ++ + def gfid_to_path_using_pgfid(brick, changelog_data, args): + """ + For all the pgfids collected, Converts to Path and +@@ -314,11 +351,11 @@ def get_changes(brick, hash_dir, log_file, start, end, args): + changelog_data.commit() + logger.info("[2/4] Finished 'pgfid to path' conversions.") + +- # Convert all GFIDs for which no other additional details available +- logger.info("[3/4] Starting 'gfid to path using pgfid' conversions ...") +- gfid_to_path_using_pgfid(brick, changelog_data, args) ++ # Convert all gfids recorded for data and metadata to all hardlink paths ++ logger.info("[3/4] Starting 'gfid2path' conversions ...") ++ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args) + changelog_data.commit() +- logger.info("[3/4] Finished 'gfid to path using pgfid' conversions.") ++ logger.info("[3/4] Finished 'gfid2path' conversions.") + + # If some GFIDs fail to get converted from previous step, + # convert using find +-- +1.8.3.1 + diff --git a/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch new file mode 100644 index 0000000..0d12daa --- /dev/null +++ b/SOURCES/0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch @@ -0,0 +1,55 @@ +From a8d8fc91af226fbf49e9dd1d7d91ad287707c4fe Mon Sep 17 00:00:00 2001 +From: Vishal Pandey <vpandey@redhat.com> +Date: Wed, 7 Aug 2019 12:53:06 +0530 +Subject: [PATCH 328/335] glusterd: Add warning and abort in case of failures + in migration during remove-brick commit + +Problem - +Currently remove-brick commit goes through even though there were files +that failed to migrate or were skipped. There is no warning raised to the user. +Solution- +Add a check in the remove brick staging phase to verify if the status of the +rebalnce process is complete but there has been failures or some skipped files +while migration, In this case user will be given a warning and remove-brick +commit. User will need to use the force option to remove the bricks. + +> Upstream Path Link: https://review.gluster.org/#/c/glusterfs/+/23171/ +> Fixes: bz#1514683 +> Signed-offby- Vishal Pandey <vpandey@redhat.com> +> Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 + +BUG: 1344758 +Change-Id: I014d0f0afb4b2fac35ab0de52227f98dbae079d5 +Signed-off-by: Vishal Pandey <vpandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185831 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index ad9a572..c5141de 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2191,6 +2191,17 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + goto out; + } + ++ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) { ++ if (volinfo->rebal.rebalance_failures > 0 || ++ volinfo->rebal.skipped_files > 0) { ++ errstr = gf_strdup( ++ "use 'force' option as migration " ++ "of some files might have been skipped or " ++ "has failed"); ++ goto out; ++ } ++ } ++ + ret = glusterd_remove_brick_validate_bricks( + cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE); + if (ret) +-- +1.8.3.1 + diff --git a/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch new file mode 100644 index 0000000..935824d --- /dev/null +++ b/SOURCES/0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch @@ -0,0 +1,165 @@ +From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Wed, 20 Nov 2019 12:26:11 +0530 +Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no + healed_sinks + +Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/ + +Problem: +In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame +anything for entry heal, heal will not complete even though we have +clear source and sinks. This will happen because while doing +afr_selfheal_find_direction() only the bricks which are blamed by +non-accused bricks are considered as sinks. Later in +__afr_selfheal_entry_finalize_source() when it tries to mark all the +non-sources as sinks it fails to do so because there won't be any +healed_sinks marked, no witness present and there will be a source. + +Fix: +If there is a source and no healed_sinks, then reset all the locked +sources to 0 and healed sinks to 1 to do conservative merge. + +Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +BUG: 1764095 +Reviewed-on: https://code.engineering.redhat.com/gerrit/185834 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++ + 2 files changed, 104 insertions(+) + create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t + +diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +new file mode 100644 +index 0000000..9627908 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +@@ -0,0 +1,89 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++function check_gfid_and_link_count ++{ ++ local file=$1 ++ ++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) ++ TEST [ ! -z $file_gfid_b0 ] ++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) ++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) ++ EXPECT $file_gfid_b0 echo $file_gfid_b1 ++ EXPECT $file_gfid_b0 echo $file_gfid_b2 ++ ++ EXPECT "2" stat -c %h $B0/${V0}0/$file ++ EXPECT "2" stat -c %h $B0/${V0}1/$file ++ EXPECT "2" stat -c %h $B0/${V0}2/$file ++} ++TESTS_EXPECTED_IN_LOOP=18 ++ ++################################################################################ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++TEST `echo "File 1 " > $M0/dir/file1` ++TEST touch $M0/dir/file{2..4} ++ ++# Remove file2 from 1st & 3rd bricks ++TEST rm -f $B0/$V0"0"/dir/file2 ++TEST rm -f $B0/$V0"2"/dir/file2 ++ ++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks ++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3) ++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3) ++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm -f $B0/$V0"0"/dir/file3 ++TEST rm -f $B0/$V0"1"/dir/file3 ++ ++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick ++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4) ++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4) ++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4 ++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 ++ ++# B0 and B2 blame each other ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir on first brick. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# All the files must be present on all the bricks after conservative merge and ++# should have the gfid xattr and the .glusterfs hardlink. ++check_gfid_and_link_count dir/file1 ++check_gfid_and_link_count dir/file2 ++check_gfid_and_link_count dir/file3 ++check_gfid_and_link_count dir/file4 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 35b600f..3ce882e 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + afr_private_t *priv = NULL; + int source = -1; + int sources_count = 0; ++ int i = 0; + + priv = this->private; + +@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, + } + + source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION); ++ ++ /*If the selected source does not blame any other brick, then mark ++ * everything as sink to trigger conservative merge. ++ */ ++ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) { ++ for (i = 0; i < priv->child_count; i++) { ++ if (locked_on[i]) { ++ sources[i] = 0; ++ healed_sinks[i] = 1; ++ } ++ } ++ return -1; ++ } ++ + return source; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0330-mount.glusterfs-change-the-error-message.patch b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch new file mode 100644 index 0000000..b64f0c6 --- /dev/null +++ b/SOURCES/0330-mount.glusterfs-change-the-error-message.patch @@ -0,0 +1,59 @@ +From 72168245761592a2cd0ebec05dd9bd9bc00745ca Mon Sep 17 00:00:00 2001 +From: Amar Tumballi <amarts@redhat.com> +Date: Wed, 13 Mar 2019 08:51:31 +0530 +Subject: [PATCH 330/335] mount.glusterfs: change the error message + +In scenarios where a mount fails before creating log file, doesn't +make sense to give message to 'check log file'. See below: + +``` +ERROR: failed to create logfile "/var/log/glusterfs/mnt.log" (No space left on device) +ERROR: failed to open logfile /var/log/glusterfs/mnt.log +Mount failed. Please check the log file for more details. +``` + +>upstream patch: https://review.gluster.org/#/c/glusterfs/+/22346/ +>Fixes: bz#1688068 +>Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +>Signed-off-by: Amar Tumballi <amarts@redhat.com> + +BUG: 1685406 +Change-Id: I1d837caa4f9bc9f1a37780783e95007e01ae4e3f +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185828 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/utils/mount.glusterfs.in | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index 3f5d76d..cbde42d 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -361,7 +361,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line $mount_point"); + $cmd_line; + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # If this is true, then glusterfs process returned error without ++ # getting daemonized. We have made sure the logs are posted to ++ # 'stderr', so no need to point them to logfile. ++ warn "Mounting glusterfs on $mount_point failed." + exit 1; + fi + +@@ -369,7 +372,9 @@ start_glusterfs () + inode=$( ${getinode} $mount_point 2>/dev/null); + # this is required if the stat returns error + if [ $? -ne 0 ]; then +- warn "Mount failed. Please check the log file for more details." ++ # At this time, glusterfs got daemonized, and then later exited. ++ # These failures are only logged in log file. ++ warn "Mount failed. Check the log file ${log_file} for more details." + umount $mount_point > /dev/null 2>&1; + exit 1; + fi +-- +1.8.3.1 + diff --git a/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch new file mode 100644 index 0000000..6eb15b0 --- /dev/null +++ b/SOURCES/0331-features-locks-Do-special-handling-for-op-version-3..patch @@ -0,0 +1,44 @@ +From 147cff762b307bf60519bae4cdefc62f655119a7 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Wed, 30 Oct 2019 10:47:17 +0530 +Subject: [PATCH 331/335] features/locks: Do special handling for op-version < + 3.12.0 + +Problem: +Patch https://code.engineering.redhat.com/gerrit/#/c/140080/ diverges from +its upstream patch(https://review.gluster.org/c/glusterfs/+/20031) in op-version. +On upstream special-handling happens for version < 3.10.0 whereas for downstream +special-handling happens for version < 3.12.0. + When rebase happened for 3.5.0 from upstream, this downstream specific change +is missed as there was no special downstream-only patch tracking this difference. +This leads to I/O errors on upgrade from 3.3.1->3.5.0 + +Fix: +Do special handling for op-version < 3.12.0 as in 3.4.x + +Change-Id: I72fec058bdfb3cd30d017d205c90aa61aec86c5d +Label: DOWNSTREAM ONLY +BUG: 1766640 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185835 +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + xlators/features/locks/src/posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 9db5ac6..4592240 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -57,7 +57,7 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + do { \ + pl_local_t *__local = NULL; \ + if (frame->root->client && \ +- (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) { \ ++ (frame->root->client->opversion < GD_OP_VERSION_3_12_0)) { \ + __local = frame->local; \ + PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \ + } else { \ +-- +1.8.3.1 + diff --git a/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch new file mode 100644 index 0000000..c9b2b56 --- /dev/null +++ b/SOURCES/0332-Removing-one-top-command-from-gluster-v-help.patch @@ -0,0 +1,57 @@ +From 808f311bd4f38f06b8afc49fc8d2c65fc4797431 Mon Sep 17 00:00:00 2001 +From: kshithijiyer <kshithij.ki@gmail.com> +Date: Fri, 28 Jun 2019 15:32:31 +0530 +Subject: [PATCH 332/335] Removing one top command from gluster v help + +The current help show 2 different top commands +intead of one single top command which can be +easily observed when "# gluster v help" command +is issued. Removing one "volume top <VOLNAME>" +and clubbing into them into a single command. + +Current help: +volume top <VOLNAME> {open|read|write|opendir|readdir|clear} +[nfs|brick <brick>] [list-cnt <value>] | +volume top <VOLNAME> {read-perf|write-perf} +[bs <size> count <count>] [brick <brick>] +[list-cnt <value>] - volume top operations + +Expected help: +volume top <VOLNAME> {open|read|write|opendir|readdir|clear} +[nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf} +[bs <size> count <count>] [brick <brick>] [list-cnt <value>] +- volume top operations + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/22972/ +> fixes: bz#1725034 +> Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +> Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> + +BUG: 1726058 +Change-Id: Ifbc4c95f2558286e27dfc5e9667046b80eb1715d +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185757 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-volume.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 66beb1b..754d333 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3427,8 +3427,8 @@ struct cli_cmd volume_cmds[] = { + cli_cmd_volume_profile_cbk, "volume profile operations"}, + + {"volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick " +- "<brick>] [list-cnt <value>] |\n" +- "volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] " ++ "<brick>] [list-cnt <value>] | " ++ "{read-perf|write-perf} [bs <size> count <count>] " + "[brick <brick>] [list-cnt <value>]", + cli_cmd_volume_top_cbk, "volume top operations"}, + +-- +1.8.3.1 + diff --git a/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch new file mode 100644 index 0000000..b1d94b4 --- /dev/null +++ b/SOURCES/0333-rpc-Synchronize-slot-allocation-code.patch @@ -0,0 +1,195 @@ +From f199094cb61341a47c98a8ed91b293446182b5a9 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Thu, 3 Oct 2019 14:06:52 +0530 +Subject: [PATCH 333/335] rpc: Synchronize slot allocation code + +Problem: Current slot allocation/deallocation code path is not + synchronized.There are scenario when due to race condition + in slot allocation/deallocation code path brick is crashed. + +Solution: Synchronize slot allocation/deallocation code path to + avoid the issue + +> Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +> Fixes: bz#1763036 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23508/) +> (Cherry pick from commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2) + +Change-Id: I4fb659a75234218ffa0e5e0bf9308f669f75fc25 +BUG: 1741193 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185827 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/event-epoll.c | 74 +++++++++++++++++++++++------------------- + 1 file changed, 41 insertions(+), 33 deletions(-) + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 0cec47e..65f5efd 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -69,15 +69,27 @@ __event_newtable(struct event_pool *event_pool, int table_idx) + } + + static int ++event_slot_ref(struct event_slot_epoll *slot) ++{ ++ if (!slot) ++ return -1; ++ ++ return GF_ATOMIC_INC(slot->ref); ++} ++ ++static int + __event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int i = 0; ++ int j = 0; + int table_idx = -1; + int gen = -1; + struct event_slot_epoll *table = NULL; + +- for (i = 0; i < EVENT_EPOLL_TABLES; i++) { ++retry: ++ ++ while (i < EVENT_EPOLL_TABLES) { + switch (event_pool->slots_used[i]) { + case EVENT_EPOLL_SLOTS: + continue; +@@ -98,6 +110,7 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + if (table) + /* break out of the loop */ + break; ++ i++; + } + + if (!table) +@@ -105,20 +118,20 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + + table_idx = i; + +- for (i = 0; i < EVENT_EPOLL_SLOTS; i++) { +- if (table[i].fd == -1) { ++ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) { ++ if (table[j].fd == -1) { + /* wipe everything except bump the generation */ +- gen = table[i].gen; +- memset(&table[i], 0, sizeof(table[i])); +- table[i].gen = gen + 1; ++ gen = table[j].gen; ++ memset(&table[j], 0, sizeof(table[j])); ++ table[j].gen = gen + 1; + +- LOCK_INIT(&table[i].lock); +- INIT_LIST_HEAD(&table[i].poller_death); ++ LOCK_INIT(&table[j].lock); ++ INIT_LIST_HEAD(&table[j].poller_death); + +- table[i].fd = fd; ++ table[j].fd = fd; + if (notify_poller_death) { +- table[i].idx = table_idx * EVENT_EPOLL_SLOTS + i; +- list_add_tail(&table[i].poller_death, ++ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j; ++ list_add_tail(&table[j].poller_death, + &event_pool->poller_death); + } + +@@ -128,18 +141,26 @@ __event_slot_alloc(struct event_pool *event_pool, int fd, + } + } + +- return table_idx * EVENT_EPOLL_SLOTS + i; ++ if (j == EVENT_EPOLL_SLOTS) { ++ table = NULL; ++ i++; ++ goto retry; ++ } else { ++ (*slot) = &table[j]; ++ event_slot_ref(*slot); ++ return table_idx * EVENT_EPOLL_SLOTS + j; ++ } + } + + static int + event_slot_alloc(struct event_pool *event_pool, int fd, +- char notify_poller_death) ++ char notify_poller_death, struct event_slot_epoll **slot) + { + int idx = -1; + + pthread_mutex_lock(&event_pool->mutex); + { +- idx = __event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot); + } + pthread_mutex_unlock(&event_pool->mutex); + +@@ -153,6 +174,7 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + int offset = 0; + struct event_slot_epoll *table = NULL; + struct event_slot_epoll *slot = NULL; ++ int fd = -1; + + table_idx = idx / EVENT_EPOLL_SLOTS; + offset = idx % EVENT_EPOLL_SLOTS; +@@ -164,11 +186,13 @@ __event_slot_dealloc(struct event_pool *event_pool, int idx) + slot = &table[offset]; + slot->gen++; + ++ fd = slot->fd; + slot->fd = -1; + slot->handled_error = 0; + slot->in_handler = 0; + list_del_init(&slot->poller_death); +- event_pool->slots_used[table_idx]--; ++ if (fd != -1) ++ event_pool->slots_used[table_idx]--; + + return; + } +@@ -185,15 +209,6 @@ event_slot_dealloc(struct event_pool *event_pool, int idx) + return; + } + +-static int +-event_slot_ref(struct event_slot_epoll *slot) +-{ +- if (!slot) +- return -1; +- +- return GF_ATOMIC_INC(slot->ref); +-} +- + static struct event_slot_epoll * + event_slot_get(struct event_pool *event_pool, int idx) + { +@@ -379,20 +394,13 @@ event_register_epoll(struct event_pool *event_pool, int fd, + if (destroy == 1) + goto out; + +- idx = event_slot_alloc(event_pool, fd, notify_poller_death); ++ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot); + if (idx == -1) { + gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, + "could not find slot for fd=%d", fd); + return -1; + } + +- slot = event_slot_get(event_pool, idx); +- if (!slot) { +- gf_msg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, +- "could not find slot for fd=%d idx=%d", fd, idx); +- return -1; +- } +- + assert(slot->fd == fd); + + LOCK(&slot->lock); +-- +1.8.3.1 + diff --git a/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch new file mode 100644 index 0000000..48f927f --- /dev/null +++ b/SOURCES/0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch @@ -0,0 +1,54 @@ +From 17940583c4d991a568582581f68dcbf08463ccaf Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Tue, 16 Jul 2019 10:31:46 +0530 +Subject: [PATCH 334/335] dht: log getxattr failure for node-uuid at "DEBUG" + +There are two ways to fetch node-uuid information from dht. + +1 - #define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids" +This key is used by AFR. + +2 - #define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol" +This key is used for non-afr volume type. + +We do two getxattr operations. First on the #1 key followed by on #2 if +getxattr on #1 key fails. + +Since the parent function "dht_init_local_subvols_and_nodeuuids" logs failure, +moving the log-level to DEBUG in dht_find_local_subvol_cbk. + +>fixes: bz#1730175 +>Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +>Signed-off-by: Susant Palai <spalai@redhat.com> +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23053/ + +BUG: 1727755 +Change-Id: I4d88244dc26587b111ca5b00d4c00118efdaac14 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185876 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 37952ba..d0b5287 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4253,8 +4253,11 @@ dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + local->op_ret = -1; + local->op_errno = op_errno; + UNLOCK(&frame->lock); +- gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED, +- "getxattr err for dir"); ++ if (op_errno == ENODATA) ++ gf_msg_debug(this->name, 0, "failed to get node-uuid"); ++ else ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid"); + goto post_unlock; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch new file mode 100644 index 0000000..c3341df --- /dev/null +++ b/SOURCES/0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch @@ -0,0 +1,15991 @@ +From 39523fd6c1b4789b12c8db81f4e08a3eb0c6a65c Mon Sep 17 00:00:00 2001 +From: Sunil Kumar Acharya <sheggodu@redhat.com> +Date: Thu, 17 Oct 2019 13:03:56 +0530 +Subject: [PATCH 335/335] tests: RHEL8 test failure fixes for RHGS + +- tests/bugs/shard/bug-1272986.t + https://review.gluster.org/#/c/glusterfs/+/23499/ + https://review.gluster.org/#/c/glusterfs/+/23551/ + +- tests/basic/posix/shared-statfs.t + https://review.gluster.org/c/glusterfs/+/23550 + +- tests/basic/fops-sanity.t + https://review.gluster.org/c/glusterfs/+/22210/ + +- tests/bugs/transport/bug-873367.t +- tests/features/ssl-authz.t +- tests/bugs/snapshot/bug-1399598-uss-with-ssl.t + https://review.gluster.org/#/c/glusterfs/+/23587/ + +- remove gnfs relatedtests + +- tests/bugs/shard/unlinks-and-renames.t + https://review.gluster.org/#/c/glusterfs/+/23585/ + +- tests/bugs/rpc/bug-954057.t +- tests/bugs/glusterfs-server/bug-887145.t + https://review.gluster.org/#/c/glusterfs/+/23710/ + +- tests/features/ssl-ciphers.t + https://review.gluster.org/#/c/glusterfs/+/23703/ + +- tests/bugs/fuse/bug-985074.t + https://review.gluster.org/#/c/glusterfs/+/23734/ + +BUG: 1762180 +Change-Id: I97b344a632b49ca9ca332a5a463756b160aee5bd +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/185716 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + tests/basic/fops-sanity.c | 1862 ++-- + tests/basic/posix/shared-statfs.t | 11 +- + tests/bugs/cli/bug-1320388.t | 2 +- + tests/bugs/fuse/bug-985074.t | 4 +- + tests/bugs/glusterd/quorum-value-check.t | 35 - + tests/bugs/glusterfs-server/bug-887145.t | 14 +- + tests/bugs/nfs/bug-1053579.t | 114 - + tests/bugs/nfs/bug-1116503.t | 47 - + tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t | 24 - + tests/bugs/nfs/bug-1157223-symlink-mounting.t | 126 - + tests/bugs/nfs/bug-1161092-nfs-acls.t | 39 - + tests/bugs/nfs/bug-1166862.t | 69 - + tests/bugs/nfs/bug-1210338.c | 31 - + tests/bugs/nfs/bug-1210338.t | 30 - + tests/bugs/nfs/bug-1302948.t | 13 - + tests/bugs/nfs/bug-847622.t | 39 - + tests/bugs/nfs/bug-877885.t | 39 - + tests/bugs/nfs/bug-904065.t | 100 - + tests/bugs/nfs/bug-915280.t | 54 - + tests/bugs/nfs/bug-970070.t | 13 - + tests/bugs/nfs/bug-974972.t | 41 - + tests/bugs/nfs/showmount-many-clients.t | 41 - + tests/bugs/nfs/socket-as-fifo.py | 33 - + tests/bugs/nfs/socket-as-fifo.t | 25 - + tests/bugs/nfs/subdir-trailing-slash.t | 32 - + tests/bugs/nfs/zero-atime.t | 33 - + tests/bugs/rpc/bug-954057.t | 10 +- + tests/bugs/shard/bug-1272986.t | 6 +- + tests/bugs/transport/bug-873367.t | 2 +- + tests/features/ssl-authz.t | 2 +- + tests/features/ssl-ciphers.t | 61 +- + tests/ssl.rc | 2 +- + xlators/features/shard/src/shard.c | 11754 ++++++++++---------- + 33 files changed, 6638 insertions(+), 8070 deletions(-) + delete mode 100755 tests/bugs/glusterd/quorum-value-check.t + delete mode 100755 tests/bugs/nfs/bug-1053579.t + delete mode 100644 tests/bugs/nfs/bug-1116503.t + delete mode 100644 tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t + delete mode 100644 tests/bugs/nfs/bug-1157223-symlink-mounting.t + delete mode 100644 tests/bugs/nfs/bug-1161092-nfs-acls.t + delete mode 100755 tests/bugs/nfs/bug-1166862.t + delete mode 100644 tests/bugs/nfs/bug-1210338.c + delete mode 100644 tests/bugs/nfs/bug-1210338.t + delete mode 100755 tests/bugs/nfs/bug-1302948.t + delete mode 100755 tests/bugs/nfs/bug-847622.t + delete mode 100755 tests/bugs/nfs/bug-877885.t + delete mode 100755 tests/bugs/nfs/bug-904065.t + delete mode 100755 tests/bugs/nfs/bug-915280.t + delete mode 100755 tests/bugs/nfs/bug-970070.t + delete mode 100755 tests/bugs/nfs/bug-974972.t + delete mode 100644 tests/bugs/nfs/showmount-many-clients.t + delete mode 100755 tests/bugs/nfs/socket-as-fifo.py + delete mode 100644 tests/bugs/nfs/socket-as-fifo.t + delete mode 100644 tests/bugs/nfs/subdir-trailing-slash.t + delete mode 100755 tests/bugs/nfs/zero-atime.t + +diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c +index aff72d8..171d003 100644 +--- a/tests/basic/fops-sanity.c ++++ b/tests/basic/fops-sanity.c +@@ -17,15 +17,16 @@ + + /* Filesystem basic sanity check, tests all (almost) fops. */ + +-#include <stdio.h> ++#include <dirent.h> ++#include <errno.h> + #include <fcntl.h> +-#include <unistd.h> +-#include <sys/types.h> ++#include <stdio.h> ++#include <string.h> + #include <sys/stat.h> ++#include <sys/sysmacros.h> ++#include <sys/types.h> + #include <sys/xattr.h> +-#include <errno.h> +-#include <string.h> +-#include <dirent.h> ++#include <unistd.h> + + #ifndef linux + #include <sys/socket.h> +@@ -34,904 +35,880 @@ + #endif + + /* for fd based fops after unlink */ +-int +-fd_based_fops_1(char *filename); ++int fd_based_fops_1(char *filename); + /* for fd based fops before unlink */ +-int +-fd_based_fops_2(char *filename); ++int fd_based_fops_2(char *filename); + /* fops based on fd after dup */ +-int +-dup_fd_based_fops(char *filename); ++int dup_fd_based_fops(char *filename); + /* for fops based on path */ +-int +-path_based_fops(char *filename); ++int path_based_fops(char *filename); + /* for fops which operate on directory */ +-int +-dir_based_fops(char *filename); ++int dir_based_fops(char *filename); + /* for fops which operate in link files (symlinks) */ +-int +-link_based_fops(char *filename); ++int link_based_fops(char *filename); + /* to test open syscall with open modes available. */ +-int +-test_open_modes(char *filename); ++int test_open_modes(char *filename); + /* generic function which does open write and read. */ +-int +-generic_open_read_write(char *filename, int flag, mode_t mode); ++int generic_open_read_write(char *filename, int flag, mode_t mode); + + #define OPEN_MODE 0666 + +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int result = 0; +- char filename[255] = { +- 0, +- }; +- +- if (argc > 1) +- strcpy(filename, argv[1]); +- else +- strcpy(filename, "temp-xattr-test-file"); +- +- ret = fd_based_fops_1(strcat(filename, "_1")); +- if (ret < 0) { +- fprintf(stderr, "fd based file operation 1 failed\n"); +- result |= ret; +- } else { +- fprintf(stdout, "fd based file operation 1 passed\n"); +- } +- +- ret = fd_based_fops_2(strcat(filename, "_2")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "fd based file operation 2 failed\n"); +- } else { +- fprintf(stdout, "fd based file operation 2 passed\n"); +- } +- +- ret = dup_fd_based_fops(strcat(filename, "_3")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "dup fd based file operation failed\n"); +- } else { +- fprintf(stdout, "dup fd based file operation passed\n"); +- } +- +- ret = path_based_fops(strcat(filename, "_4")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "path based file operation failed\n"); +- } else { +- fprintf(stdout, "path based file operation passed\n"); +- } +- +- ret = dir_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "directory based file operation failed\n"); +- } else { +- fprintf(stdout, "directory based file operation passed\n"); +- } +- +- ret = link_based_fops(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "link based file operation failed\n"); +- } else { +- fprintf(stdout, "link based file operation passed\n"); +- } +- +- ret = test_open_modes(strcat(filename, "_5")); +- if (ret < 0) { +- result |= ret; +- fprintf(stderr, "testing modes of `open' call failed\n"); +- } else { +- fprintf(stdout, "testing modes of `open' call passed\n"); +- } +- return result; ++int main(int argc, char *argv[]) { ++ int ret = -1; ++ int result = 0; ++ char filename[255] = { ++ 0, ++ }; ++ ++ if (argc > 1) ++ strcpy(filename, argv[1]); ++ else ++ strcpy(filename, "temp-xattr-test-file"); ++ ++ ret = fd_based_fops_1(strcat(filename, "_1")); ++ if (ret < 0) { ++ fprintf(stderr, "fd based file operation 1 failed\n"); ++ result |= ret; ++ } else { ++ fprintf(stdout, "fd based file operation 1 passed\n"); ++ } ++ ++ ret = fd_based_fops_2(strcat(filename, "_2")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "fd based file operation 2 failed\n"); ++ } else { ++ fprintf(stdout, "fd based file operation 2 passed\n"); ++ } ++ ++ ret = dup_fd_based_fops(strcat(filename, "_3")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "dup fd based file operation failed\n"); ++ } else { ++ fprintf(stdout, "dup fd based file operation passed\n"); ++ } ++ ++ ret = path_based_fops(strcat(filename, "_4")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "path based file operation failed\n"); ++ } else { ++ fprintf(stdout, "path based file operation passed\n"); ++ } ++ ++ ret = dir_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "directory based file operation failed\n"); ++ } else { ++ fprintf(stdout, "directory based file operation passed\n"); ++ } ++ ++ ret = link_based_fops(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "link based file operation failed\n"); ++ } else { ++ fprintf(stdout, "link based file operation passed\n"); ++ } ++ ++ ret = test_open_modes(strcat(filename, "_5")); ++ if (ret < 0) { ++ result |= ret; ++ fprintf(stderr, "testing modes of `open' call failed\n"); ++ } else { ++ fprintf(stdout, "testing modes of `open' call passed\n"); ++ } ++ return result; + } + + /* Execute all possible fops on a fd which is unlinked */ +-int +-fd_based_fops_1(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- /* +- * These metadata operations fail at the moment because kernel doesn't +- * pass the client fd in the operation. +- * The following bug tracks this change. +- * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 +- * ret = fchmod (fd, 0640); +- * if (ret < 0) { +- * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fchown (fd, 10001, 10001); +- * if (ret < 0) { +- * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); +- * result |= ret; +- * } +- +- * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); +- * if (ret < 0) { +- * fprintf (stderr, "fsetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = flistxattr (fd, NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "flistxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); +- * if (ret <= 0) { +- * fprintf (stderr, "fgetxattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- +- * ret = fremovexattr (fd, "trusted.xattr-test"); +- * if (ret < 0) { +- * fprintf (stderr, "fremovexattr failed : %s\n", strerror +- (errno)); +- * result |= ret; +- * } +- */ +- +- if (fd) +- close(fd); +- return result; ++int fd_based_fops_1(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ /* ++ * These metadata operations fail at the moment because kernel doesn't ++ * pass the client fd in the operation. ++ * The following bug tracks this change. ++ * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 ++ * ret = fchmod (fd, 0640); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fchown (fd, 10001, 10001); ++ * if (ret < 0) { ++ * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); ++ * if (ret < 0) { ++ * fprintf (stderr, "fsetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = flistxattr (fd, NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "flistxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); ++ * if (ret <= 0) { ++ * fprintf (stderr, "fgetxattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ ++ * ret = fremovexattr (fd, "trusted.xattr-test"); ++ * if (ret < 0) { ++ * fprintf (stderr, "fremovexattr failed : %s\n", strerror ++ (errno)); ++ * result |= ret; ++ * } ++ */ ++ ++ if (fd) ++ close(fd); ++ return result; + } + +-int +-fd_based_fops_2(char *filename) +-{ +- int fd = 0; +- int ret = -1; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- ret = ftruncate(fd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(wstr, "This is my second string\n"); +- ret = write(fd, wstr, strlen(wstr)); +- if (ret < 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(fd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = fstat(fd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(fd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(fd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(fd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(fd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(fd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(fd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(filename); ++int fd_based_fops_2(char *filename) { ++ int fd = 0; ++ int ret = -1; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = ftruncate(fd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(wstr, "This is my second string\n"); ++ ret = write(fd, wstr, strlen(wstr)); ++ if (ret < 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(fd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = fstat(fd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(fd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(fd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(fd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(fd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(fd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(filename); + +- return result; ++ return result; + } + +-int +-path_based_fops(char *filename) +-{ +- int ret = -1; +- int fd = 0; +- int result = 0; +- struct stat stbuf = { +- 0, +- }; +- char newfilename[255] = { +- 0, +- }; +- char *hardlink = "linkfile-hard.txt"; +- char *symlnk = "linkfile-soft.txt"; +- char buf[1024] = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- ret = truncate(filename, 0); +- if (ret < 0) { +- fprintf(stderr, "truncate failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(filename, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(filename, 0640); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(filename, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(filename, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(filename, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(filename, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = access(filename, R_OK | W_OK); +- if (ret < 0) { +- fprintf(stderr, "access failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = link(filename, hardlink); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(hardlink); +- +- ret = symlink(filename, symlnk); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = readlink(symlnk, buf, sizeof(buf)); +- if (ret < 0) { +- fprintf(stderr, "readlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(symlnk); +- +- /* Create a character special file */ +- ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); +- if (ret < 0) { +- fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("cspecial"); +- +- ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); +- if (ret < 0) { +- fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("bspecial"); ++int path_based_fops(char *filename) { ++ int ret = -1; ++ int fd = 0; ++ int result = 0; ++ struct stat stbuf = { ++ 0, ++ }; ++ char newfilename[255] = { ++ 0, ++ }; ++ char *hardlink = "linkfile-hard.txt"; ++ char *symlnk = "linkfile-soft.txt"; ++ char buf[1024] = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ ret = truncate(filename, 0); ++ if (ret < 0) { ++ fprintf(stderr, "truncate failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(filename, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(filename, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(filename, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(filename, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(filename, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(filename, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = access(filename, R_OK | W_OK); ++ if (ret < 0) { ++ fprintf(stderr, "access failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = link(filename, hardlink); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(hardlink); ++ ++ ret = symlink(filename, symlnk); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = readlink(symlnk, buf, sizeof(buf)); ++ if (ret < 0) { ++ fprintf(stderr, "readlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(symlnk); ++ ++ /* Create a character special file */ ++ ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); ++ if (ret < 0) { ++ fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("cspecial"); ++ ++ ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); ++ if (ret < 0) { ++ fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("bspecial"); + + #ifdef linux +- ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); ++ ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); + #else +- ret = mkfifo("fifo", 0); ++ ret = mkfifo("fifo", 0); + #endif +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink("fifo"); ++ if (ret < 0) { ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink("fifo"); + + #ifdef linux +- ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); +- if (ret < 0) { +- fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } ++ ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); ++ if (ret < 0) { ++ fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } + #else +- { +- int s; +- const char *pathname = "sock"; +- struct sockaddr_un addr; +- +- s = socket(PF_LOCAL, SOCK_STREAM, 0); +- memset(&addr, 0, sizeof(addr)); +- strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); +- ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); +- if (ret < 0) { +- fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- close(s); +- } +-#endif +- unlink("sock"); ++ { ++ int s; ++ const char *pathname = "sock"; ++ struct sockaddr_un addr; + +- strcpy(newfilename, filename); +- strcat(newfilename, "_new"); +- ret = rename(filename, newfilename); ++ s = socket(PF_LOCAL, SOCK_STREAM, 0); ++ memset(&addr, 0, sizeof(addr)); ++ strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); ++ ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); + if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- unlink(newfilename); +- +- if (fd) +- close(fd); +- +- unlink(filename); +- return result; +-} +- +-int +-dup_fd_based_fops(char *filename) +-{ +- int fd = 0; +- int result = 0; +- int newfd = 0; +- int ret = -1; +- struct stat stbuf = { +- 0, +- }; +- char wstr[50] = { +- 0, +- }; +- char rstr[50] = { +- 0, +- }; +- +- fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); +- if (fd < 0) { +- fprintf(stderr, "open failed : %s\n", strerror(errno)); +- return ret; +- } +- +- newfd = dup(fd); +- if (newfd < 0) { +- fprintf(stderr, "dup failed: %s\n", strerror(errno)); +- result |= ret; ++ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); ++ result |= ret; + } +- ++ close(s); ++ } ++#endif ++ unlink("sock"); ++ ++ strcpy(newfilename, filename); ++ strcat(newfilename, "_new"); ++ ret = rename(filename, newfilename); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ unlink(newfilename); ++ ++ if (fd) + close(fd); + +- strcpy(wstr, "This is my string\n"); +- ret = write(newfd, wstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lseek(newfd, 0, SEEK_SET); +- if (ret < 0) { +- fprintf(stderr, "lseek failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = read(newfd, rstr, strlen(wstr)); +- if (ret <= 0) { +- fprintf(stderr, "read failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = memcmp(rstr, wstr, strlen(wstr)); +- if (ret != 0) { +- fprintf(stderr, "read returning junk\n"); +- result |= ret; +- } +- +- ret = ftruncate(newfd, 0); +- if (ret < 0) { +- fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fstat(newfd, &stbuf); +- if (ret < 0) { +- fprintf(stderr, "fstat failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchmod(newfd, 0640); +- if (ret < 0) { +- fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fchown(newfd, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "fchown failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fsync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fdatasync(newfd); +- if (ret < 0) { +- fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = flistxattr(newfd, NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = fremovexattr(newfd, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (newfd) +- close(newfd); +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed : %s\n", strerror(errno)); +- result |= ret; +- } +- return result; ++ unlink(filename); ++ return result; + } + +-int +-dir_based_fops(char *dirname) +-{ +- int ret = -1; +- int result = 0; +- DIR *dp = NULL; +- char buff[255] = { +- 0, +- }; +- struct dirent *dbuff = { +- 0, +- }; +- struct stat stbuff = { +- 0, +- }; +- char newdname[255] = { +- 0, +- }; +- char *cwd = NULL; +- +- ret = mkdir(dirname, 0755); +- if (ret < 0) { +- fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); +- return ret; +- } +- +- dp = opendir(dirname); +- if (dp == NULL) { +- fprintf(stderr, "opendir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- dbuff = readdir(dp); +- if (NULL == dbuff) { +- fprintf(stderr, "readdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = closedir(dp); +- if (ret < 0) { +- fprintf(stderr, "closedir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = stat(dirname, &stbuff); +- if (ret < 0) { +- fprintf(stderr, "stat failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chmod(dirname, 0744); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = chown(dirname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "chmod failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = listxattr(dirname, NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); +- if (ret <= 0) { +- ret = -1; +- fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = removexattr(dirname, "trusted.xattr-test"); +- if (ret < 0) { +- fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "/../"); +- ret = chdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "chdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- cwd = getcwd(buff, 255); +- if (NULL == cwd) { +- fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(newdname, dirname); +- strcat(newdname, "new"); +- ret = rename(dirname, newdname); +- if (ret < 0) { +- fprintf(stderr, "rename failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = rmdir(newdname); +- if (ret < 0) { +- fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- rmdir(dirname); +- return result; ++int dup_fd_based_fops(char *filename) { ++ int fd = 0; ++ int result = 0; ++ int newfd = 0; ++ int ret = -1; ++ struct stat stbuf = { ++ 0, ++ }; ++ char wstr[50] = { ++ 0, ++ }; ++ char rstr[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); ++ if (fd < 0) { ++ fprintf(stderr, "open failed : %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ newfd = dup(fd); ++ if (newfd < 0) { ++ fprintf(stderr, "dup failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ close(fd); ++ ++ strcpy(wstr, "This is my string\n"); ++ ret = write(newfd, wstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lseek(newfd, 0, SEEK_SET); ++ if (ret < 0) { ++ fprintf(stderr, "lseek failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = read(newfd, rstr, strlen(wstr)); ++ if (ret <= 0) { ++ fprintf(stderr, "read failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = memcmp(rstr, wstr, strlen(wstr)); ++ if (ret != 0) { ++ fprintf(stderr, "read returning junk\n"); ++ result |= ret; ++ } ++ ++ ret = ftruncate(newfd, 0); ++ if (ret < 0) { ++ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fstat(newfd, &stbuf); ++ if (ret < 0) { ++ fprintf(stderr, "fstat failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchmod(newfd, 0640); ++ if (ret < 0) { ++ fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fchown(newfd, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "fchown failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fsync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fdatasync(newfd); ++ if (ret < 0) { ++ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = flistxattr(newfd, NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = fremovexattr(newfd, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (newfd) ++ close(newfd); ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed : %s\n", strerror(errno)); ++ result |= ret; ++ } ++ return result; + } + +-int +-link_based_fops(char *filename) +-{ +- int ret = -1; +- int result = 0; +- int fd = 0; +- char newname[255] = { +- 0, +- }; +- char linkname[255] = { +- 0, +- }; +- struct stat lstbuf = { +- 0, +- }; +- +- fd = creat(filename, 0644); +- if (fd < 0) { +- fd = 0; +- fprintf(stderr, "creat failed: %s\n", strerror(errno)); +- return ret; +- } +- +- strcpy(newname, filename); +- strcat(newname, "_hlink"); +- ret = link(filename, newname); +- if (ret < 0) { +- fprintf(stderr, "link failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = unlink(filename); +- if (ret < 0) { +- fprintf(stderr, "unlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- strcpy(linkname, filename); +- strcat(linkname, "_slink"); +- ret = symlink(newname, linkname); +- if (ret < 0) { +- fprintf(stderr, "symlink failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lstat(linkname, &lstbuf); +- if (ret < 0) { +- fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lchown(linkname, 10001, 10001); +- if (ret < 0) { +- fprintf(stderr, "lchown failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); +- if (ret < 0) { +- fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = llistxattr(linkname, NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); +- if (ret < 0) { +- ret = -1; +- fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- ret = lremovexattr(linkname, "trusted.lxattr-test"); +- if (ret < 0) { +- fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); +- result |= ret; +- } +- +- if (fd) +- close(fd); +- unlink(linkname); +- unlink(newname); +- return result; ++int dir_based_fops(char *dirname) { ++ int ret = -1; ++ int result = 0; ++ DIR *dp = NULL; ++ char buff[255] = { ++ 0, ++ }; ++ struct dirent *dbuff = { ++ 0, ++ }; ++ struct stat stbuff = { ++ 0, ++ }; ++ char newdname[255] = { ++ 0, ++ }; ++ char *cwd = NULL; ++ ++ ret = mkdir(dirname, 0755); ++ if (ret < 0) { ++ fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ dp = opendir(dirname); ++ if (dp == NULL) { ++ fprintf(stderr, "opendir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ dbuff = readdir(dp); ++ if (NULL == dbuff) { ++ fprintf(stderr, "readdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = closedir(dp); ++ if (ret < 0) { ++ fprintf(stderr, "closedir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = stat(dirname, &stbuff); ++ if (ret < 0) { ++ fprintf(stderr, "stat failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chmod(dirname, 0744); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = chown(dirname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "chmod failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = listxattr(dirname, NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); ++ if (ret <= 0) { ++ ret = -1; ++ fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = removexattr(dirname, "trusted.xattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "/../"); ++ ret = chdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "chdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ cwd = getcwd(buff, 255); ++ if (NULL == cwd) { ++ fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(newdname, dirname); ++ strcat(newdname, "new"); ++ ret = rename(dirname, newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rename failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = rmdir(newdname); ++ if (ret < 0) { ++ fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ rmdir(dirname); ++ return result; + } + +-int +-test_open_modes(char *filename) +-{ +- int ret = -1; +- int result = 0; +- +- ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); +- if (ret != 0) { +- fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_WRONLY, 0); +- if (ret != 0) { +- fprintf(stderr, "flag O_WRONLY failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDWR, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDWR failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_RDONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_RDONLY failed\n"); +- result |= ret; +- } ++int link_based_fops(char *filename) { ++ int ret = -1; ++ int result = 0; ++ int fd = 0; ++ char newname[255] = { ++ 0, ++ }; ++ char linkname[255] = { ++ 0, ++ }; ++ struct stat lstbuf = { ++ 0, ++ }; ++ ++ fd = creat(filename, 0644); ++ if (fd < 0) { ++ fd = 0; ++ fprintf(stderr, "creat failed: %s\n", strerror(errno)); ++ return ret; ++ } ++ ++ strcpy(newname, filename); ++ strcat(newname, "_hlink"); ++ ret = link(filename, newname); ++ if (ret < 0) { ++ fprintf(stderr, "link failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = unlink(filename); ++ if (ret < 0) { ++ fprintf(stderr, "unlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ strcpy(linkname, filename); ++ strcat(linkname, "_slink"); ++ ret = symlink(newname, linkname); ++ if (ret < 0) { ++ fprintf(stderr, "symlink failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lstat(linkname, &lstbuf); ++ if (ret < 0) { ++ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lchown(linkname, 10001, 10001); ++ if (ret < 0) { ++ fprintf(stderr, "lchown failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); ++ if (ret < 0) { ++ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = llistxattr(linkname, NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); ++ if (ret < 0) { ++ ret = -1; ++ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ ret = lremovexattr(linkname, "trusted.lxattr-test"); ++ if (ret < 0) { ++ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); ++ result |= ret; ++ } ++ ++ if (fd) ++ close(fd); ++ unlink(linkname); ++ unlink(newname); ++ return result; ++} + +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); +- if (0 != ret) { +- fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); +- result |= ret; +- } ++int test_open_modes(char *filename) { ++ int ret = -1; ++ int result = 0; ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_WRONLY, 0); ++ if (ret != 0) { ++ fprintf(stderr, "flag O_WRONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDWR, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDWR failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_RDONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_RDONLY failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); ++ result |= ret; ++ } + + #if 0 /* undefined behaviour, unable to reliably test */ + ret = creat (filename, 0644); +@@ -943,90 +920,87 @@ test_open_modes(char *filename) + } + #endif + +- ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, +- OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); +- result |= ret; +- } +- +- ret = creat(filename, 0644); +- close(ret); +- ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); +- if (0 != ret) { +- fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); +- result |= ret; +- } +- +- return result; ++ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); ++ result |= ret; ++ } ++ ++ ret = creat(filename, 0644); ++ close(ret); ++ ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); ++ if (0 != ret) { ++ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); ++ result |= ret; ++ } ++ ++ return result; + } + +-int +-generic_open_read_write(char *filename, int flag, mode_t mode) +-{ +- int fd = 0; +- int ret = -1; +- char wstring[50] = { +- 0, +- }; +- char rstring[50] = { +- 0, +- }; +- +- fd = open(filename, flag, mode); +- if (fd < 0) { +- if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { +- unlink(filename); +- return 0; +- } else { +- fprintf(stderr, "open failed: %s\n", strerror(errno)); +- return -1; +- } +- } +- +- strcpy(wstring, "My string to write\n"); +- ret = write(fd, wstring, strlen(wstring)); +- if (ret <= 0) { +- if (errno != EBADF) { +- fprintf(stderr, "write failed: %s\n", strerror(errno)); +- close(fd); +- unlink(filename); +- return ret; +- } +- } +- +- ret = lseek(fd, 0, SEEK_SET); +- if (ret < 0) { +- close(fd); +- unlink(filename); +- return ret; ++int generic_open_read_write(char *filename, int flag, mode_t mode) { ++ int fd = 0; ++ int ret = -1; ++ char wstring[50] = { ++ 0, ++ }; ++ char rstring[50] = { ++ 0, ++ }; ++ ++ fd = open(filename, flag, mode); ++ if (fd < 0) { ++ if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { ++ unlink(filename); ++ return 0; ++ } else { ++ fprintf(stderr, "open failed: %s\n", strerror(errno)); ++ return -1; + } ++ } + +- ret = read(fd, rstring, strlen(wstring)); +- if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && +- flag != (O_TRUNC | O_WRONLY)) { +- close(fd); +- unlink(filename); +- return ret; ++ strcpy(wstring, "My string to write\n"); ++ ret = write(fd, wstring, strlen(wstring)); ++ if (ret <= 0) { ++ if (errno != EBADF) { ++ fprintf(stderr, "write failed: %s\n", strerror(errno)); ++ close(fd); ++ unlink(filename); ++ return ret; + } ++ } + +- /* Compare the rstring with wstring. But we do not want to return +- * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or +- * O_TRUNC|O_RDONLY. Because in that case we are not writing +- * anything to the file.*/ +- +- ret = memcmp(wstring, rstring, strlen(wstring)); +- if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && +- flag != (O_CREAT | O_WRONLY) && +- !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || +- flag == (O_TRUNC | O_RDONLY))) { +- fprintf(stderr, "read is returning junk\n"); +- close(fd); +- unlink(filename); +- return ret; +- } ++ ret = lseek(fd, 0, SEEK_SET); ++ if (ret < 0) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } + ++ ret = read(fd, rstring, strlen(wstring)); ++ if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_TRUNC | O_WRONLY)) { ++ close(fd); ++ unlink(filename); ++ return ret; ++ } ++ ++ /* Compare the rstring with wstring. But we do not want to return ++ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or ++ * O_TRUNC|O_RDONLY. Because in that case we are not writing ++ * anything to the file.*/ ++ ++ ret = memcmp(wstring, rstring, strlen(wstring)); ++ if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && ++ flag != (O_CREAT | O_WRONLY) && ++ !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || ++ flag == (O_TRUNC | O_RDONLY))) { ++ fprintf(stderr, "read is returning junk\n"); + close(fd); + unlink(filename); +- return 0; ++ return ret; ++ } ++ ++ close(fd); ++ unlink(filename); ++ return 0; + } +diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t +index 3343956..0e4a1bb 100644 +--- a/tests/basic/posix/shared-statfs.t ++++ b/tests/basic/posix/shared-statfs.t +@@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2 + TEST MOUNT_LOOP $LO1 $B0/${V0}1 + TEST MOUNT_LOOP $LO2 $B0/${V0}2 + ++total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}') ++#Account for rounding error ++brick_blocks_two_percent_less=$((total_brick_blocks*98/100)) + # Create a subdir in mountpoint and use that for volume. + TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') + # Keeping the size less than 200M mainly because XFS will use + # some storage in brick to keep its own metadata. +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + + TEST force_umount $M0 +@@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/ + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count + TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +-total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +-TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] ++total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') ++TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] + + TEST force_umount $M0 + TEST $CLI volume stop $V0 +diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t +index 8e5d77b..e719fc5 100755 +--- a/tests/bugs/cli/bug-1320388.t ++++ b/tests/bugs/cli/bug-1320388.t +@@ -21,7 +21,7 @@ cleanup; + rm -f $SSL_BASE/glusterfs.* + touch "$GLUSTERD_WORKDIR"/secure-access + +-TEST openssl genrsa -out $SSL_KEY 3072 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t +index d10fd9f..26d196e 100644 +--- a/tests/bugs/fuse/bug-985074.t ++++ b/tests/bugs/fuse/bug-985074.t +@@ -30,7 +30,7 @@ TEST glusterd + + TEST $CLI volume create $V0 $H0:$B0/$V0 + TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 md-cache-timeout 3 ++TEST $CLI volume set $V0 performance.stat-prefetch off + + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0 +@@ -40,8 +40,6 @@ TEST ln $M0/file $M0/file.link + TEST ls -ali $M0 $M1 + TEST rm -f $M1/file.link + TEST ls -ali $M0 $M1 +-# expire the md-cache timeout +-sleep 3 + TEST mv $M0/file $M0/file.link + TEST stat $M0/file.link + TEST ! stat $M0/file +diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/bugs/glusterd/quorum-value-check.t +deleted file mode 100755 +index aaf6362..0000000 +--- a/tests/bugs/glusterd/quorum-value-check.t ++++ /dev/null +@@ -1,35 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +- +-function check_quorum_nfs() { +- local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')" +- local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')" +- +- if [ $qnfs = $qinfo ]; then +- echo "Y" +- else +- echo "N" +- fi +-} +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +-TEST $CLI volume set $V0 nfs.disable off +-TEST $CLI volume set $V0 performance.write-behind off +-TEST $CLI volume set $V0 cluster.self-heal-daemon off +-TEST $CLI volume set $V0 cluster.quorum-type fixed +-TEST $CLI volume start $V0 +- +-TEST $CLI volume set $V0 cluster.quorum-count 1 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 2 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +-TEST $CLI volume set $V0 cluster.quorum-count 3 +-EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs +- +-cleanup; +diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t +index 82f7cca..f65b1bd 100755 +--- a/tests/bugs/glusterfs-server/bug-887145.t ++++ b/tests/bugs/glusterfs-server/bug-887145.t +@@ -29,7 +29,15 @@ chmod 600 $M0/file; + + TEST mount_nfs $H0:/$V0 $N0 nolock; + +-chown -R nfsnobody:nfsnobody $M0/dir; ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++chown -R $usr:$grp $M0/dir; + chown -R tmp_user:tmp_user $M0/other; + + TEST $CLI volume set $V0 server.root-squash on; +@@ -38,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; + + # create files and directories in the root of the glusterfs and nfs mount + # which is owned by root and hence the right behavior is getting EACCESS +-# as the fops are executed as nfsnobody. ++# as the fops are executed as nfsnobody/nobody. + touch $M0/foo 2>/dev/null; + TEST [ $? -ne 0 ] + touch $N0/foo 2>/dev/null; +@@ -61,7 +69,7 @@ cat $N0/passwd 1>/dev/null; + TEST [ $? -eq 0 ] + + # create files and directories should succeed as the fops are being executed +-# inside the directory owned by nfsnobody ++# inside the directory owned by nfsnobody/nobody + TEST touch $M0/dir/file; + TEST touch $N0/dir/foo; + TEST mkdir $M0/dir/new; +diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t +deleted file mode 100755 +index 2f53172..0000000 +--- a/tests/bugs/nfs/bug-1053579.t ++++ /dev/null +@@ -1,114 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-# prepare the users and groups +-NEW_USER=bug1053579 +-NEW_UID=1053579 +-NEW_GID=1053579 +-LAST_GID=1053779 +-NEW_GIDS=${NEW_GID} +- +-# OS-specific overrides +-case $OSTYPE in +-NetBSD|Darwin) +- # only NGROUPS_MAX=16 secondary groups are supported +- LAST_GID=1053593 +- ;; +-FreeBSD) +- # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups +- ;; +-Linux) +- # NGROUPS_MAX=65536, we can afford 200 groups +- ;; +-*) +- ;; +-esac +- +-# create a user that belongs to many groups +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupadd -o -g ${GID} ${NEW_USER}-${GID} +- NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}" +-done +-TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} +- +-# preparation done, start the tests +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/${V0}1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.server-aux-gids on +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-# mount the volume +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +- +-# the actual test, this used to crash +-su -m ${NEW_USER} -c "stat $N0/. > /dev/null" +-TEST [ $? -eq 0 ] +- +-# create a file that only a user in a high-group can access +-echo 'Hello World!' > $N0/README +-chgrp ${LAST_GID} $N0/README +-chmod 0640 $N0/README +- +-#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-su -m ${NEW_USER} -c "cat $N0/README" +-ret=$? +- +-case $OSTYPE in +-Linux) # Linux NFS fails with big GID +- if [ $ret -ne 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-*) # Other systems should cope better +- if [ $ret -eq 0 ] ; then +- res="Y" +- else +- res="N" +- fi +- ;; +-esac +-TEST [ "x$res" = "xY" ] +- +-# This passes only on build.gluster.org, not reproducible on other machines?! +-#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-#TEST [ $? -ne 0 ] +- +-# enable server.manage-gids and things should work +-TEST $CLI volume set $V0 server.manage-gids on +- +-su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +-su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" +-TEST [ $? -eq 0 ] +- +-# cleanup +-userdel --force ${NEW_USER} +-for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) +-do +- groupdel ${NEW_USER}-${GID} +-done +- +-rm -f $N0/README +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +- +-TEST $CLI volume stop $V0 +-TEST $CLI volume delete $V0 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/bugs/nfs/bug-1116503.t +deleted file mode 100644 +index dd3998d..0000000 +--- a/tests/bugs/nfs/bug-1116503.t ++++ /dev/null +@@ -1,47 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting NFS over UDP (MOUNT service only) works. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 nfs.mount-udp on +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp; +-TEST mkdir -p $N0/foo/bar +-TEST ls $N0/foo +-TEST ls $N0/foo/bar +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.addr-namelookup on +-TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t +deleted file mode 100644 +index c360db4..0000000 +--- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t ++++ /dev/null +@@ -1,24 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume set $V0 performance.open-behind off +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/foo +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-TEST mount_nfs $H0:/$V0/foo $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +-cleanup +diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t +deleted file mode 100644 +index dea609e..0000000 +--- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t ++++ /dev/null +@@ -1,126 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-## Start and create a volume +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume info; +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0; +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount NFS +-TEST mount_nfs $H0:/$V0 $N0 nolock; +- +-mkdir $N0/dir1; +-mkdir $N0/dir2; +-pushd $N0/ ; +- +-##link created using relative path +-ln -s dir1 symlink1; +- +-##relative path contains ".." +-ln -s ../dir1 dir2/symlink2; +- +-##link created using absolute path +-ln -s $N0/dir1 symlink3; +- +-##link pointing to another symlinks +-ln -s symlink1 symlink4 +-ln -s symlink3 symlink5 +- +-##dead links +-ln -s does/not/exist symlink6 +- +-##link which contains ".." points out of glusterfs +-ln -s ../../ symlink7 +- +-##links pointing to unauthorized area +-ln -s .glusterfs symlink8 +- +-popd ; +- +-##Umount the volume +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock; +- +-## Mount NFS via symlink7 should fail +-TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock; +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock; +- +-##Similar check for udp mount +-$CLI volume stop $V0 +-TEST $CLI volume set $V0 nfs.mount-udp on +-$CLI volume start $V0 +- +-## Wait for volume to register with rpc.mountd +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-## Mount and umount NFS via directory +-TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink1 +-TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount and umount NFS via symlink2 +-TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink3 should fail +-TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount and umount NFS via symlink4 +-TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp; +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 +- +-## Mount NFS via symlink5 should fail +-TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp; +- +-## Mount NFS via symlink6 should fail +-TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp; +- +-##symlink7 is not check here, because in udp mount ../../ resolves into root '/' +- +-## Mount NFS via symlink8 should fail +-TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp; +- +-rm -rf $H0:$B0/ +-cleanup; +diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t +deleted file mode 100644 +index 45a22e7..0000000 +--- a/tests/bugs/nfs/bug-1161092-nfs-acls.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume info +- +-TEST $CLI volume create $V0 $H0:$B0/brick1; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +-TEST mount_nfs $H0:/$V0 $N0 +- +-TEST touch $N0/file1 +-TEST chmod 700 $N0/file1 +-TEST getfacl $N0/file1 +- +-TEST $CLI volume set $V0 root-squash on +-TEST getfacl $N0/file1 +- +-TEST umount_nfs $H0:/$V0 $N0 +-TEST mount_nfs $H0:/$V0 $N0 +-TEST getfacl $N0/file1 +- +-## Before killing daemon to avoid deadlocks +-umount_nfs $N0 +- +-cleanup; +- +diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t +deleted file mode 100755 +index c4f51a2..0000000 +--- a/tests/bugs/nfs/bug-1166862.t ++++ /dev/null +@@ -1,69 +0,0 @@ +-#!/bin/bash +-# +-# When nfs.mount-rmtab is disabled, it should not get updated. +-# +-# Based on: bug-904065.t +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -n "$1" ] +- then +- $@ 2>/dev/null | wc -l +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# disable the rmtab by settting it to the magic "/-" value +-TEST $CLI volume set $V0 nfs.mount-rmtab /- +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# showmount should list one client +-EXPECT '1' count_lines showmount --no-headers $H0 +- +-# unmount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-# after resetting the option, the rmtab should get updated again +-TEST $CLI volume reset $V0 nfs.mount-rmtab +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c +deleted file mode 100644 +index d409924..0000000 +--- a/tests/bugs/nfs/bug-1210338.c ++++ /dev/null +@@ -1,31 +0,0 @@ +-#include <stdio.h> +-#include <stdlib.h> +-#include <unistd.h> +-#include <string.h> +-#include <errno.h> +-#include <sys/types.h> +-#include <fcntl.h> +-#include <sys/stat.h> +- +-int +-main(int argc, char *argv[]) +-{ +- int ret = -1; +- int fd = -1; +- +- fd = open(argv[1], O_CREAT | O_EXCL, 0644); +- +- if (fd == -1) { +- fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1], +- strerror(errno)); +- goto out; +- } +- +- ret = 0; +- +-out: +- if (fd > 0) +- close(fd); +- +- return ret; +-} +diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t +deleted file mode 100644 +index b5c9245..0000000 +--- a/tests/bugs/nfs/bug-1210338.t ++++ /dev/null +@@ -1,30 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-NFS_SOURCE=$(dirname $0)/bug-1210338.c +-NFS_EXEC=$(dirname $0)/excl_create +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-build_tester $NFS_SOURCE -o $NFS_EXEC +-TEST [ -e $NFS_EXEC ] +- +-TEST $NFS_EXEC $N0/my_file +- +-rm -f $NFS_EXEC; +- +-cleanup +diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t +deleted file mode 100755 +index a2fb0e6..0000000 +--- a/tests/bugs/nfs/bug-1302948.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.rdirplus option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.rdirplus off +-TEST $CLI volume set $V0 nfs.rdirplus on +-cleanup +diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t +deleted file mode 100755 +index 5ccee72..0000000 +--- a/tests/bugs/nfs/bug-847622.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-case $OSTYPE in +-NetBSD) +- echo "Skip test on ACL which are not available on NetBSD" >&2 +- SKIP_TESTS +- exit 0 +- ;; +-*) +- ;; +-esac +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 $H0:$B0/brick0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-# simple getfacl setfacl commands +-TEST touch testfile +-TEST setfacl -m u:14:r testfile +-TEST getfacl testfile +- +-cd +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +- +diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t +deleted file mode 100755 +index dca315a..0000000 +--- a/tests/bugs/nfs/bug-877885.t ++++ /dev/null +@@ -1,39 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +- +-## Mount FUSE with caching disabled +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \ +-$M0; +- +-TEST touch $M0/file +-TEST mkdir $M0/dir +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +-cd $N0 +- +-rm -rf * & +- +-TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock; +- +-cd; +- +-kill %1; +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +- +-cleanup +diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t +deleted file mode 100755 +index 0eba86e..0000000 +--- a/tests/bugs/nfs/bug-904065.t ++++ /dev/null +@@ -1,100 +0,0 @@ +-#!/bin/bash +-# +-# This test does not use 'showmount' from the nfs-utils package, it would +-# require setting up a portmapper (either rpcbind or portmap, depending on the +-# Linux distribution used for testing). The persistancy of the rmtab should not +-# affect the current showmount outputs, so existing regression tests should be +-# sufficient. +-# +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-# count the lines of a file, return 0 if the file does not exist +-function count_lines() +-{ +- if [ -e "$1" ] +- then +- wc -l < $1 +- else +- echo 0 +- fi +-} +- +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# before mounting the rmtab should be empty +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-# the output would looks similar to: +-# +-# hostname-0=172.31.122.104 +-# mountpoint-0=/ufo +-# +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# duplicate mounts should not be recorded (client could have crashed) +-TEST mount_nfs $H0:/$V0 $N1 nolock +-EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# removing a mount should (even if there are two) should remove the entry +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-# unmounting the other mount should work flawlessly +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab +- +-TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0 +- +-# we'll create a fake rmtab here, similar to how an other storage server would do +-# using an invalid IP address to prevent (unlikely) collisions on the test-machine +-cat << EOF > $M0/rmtab +-hostname-0=127.0.0.256 +-mountpoint-0=/ufo +-EOF +-EXPECT '2' count_lines $M0/rmtab +- +-# reconfigure merges the rmtab with the one on the volume +-TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab +- +-# glusterfs/nfs needs some time to restart +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# Apparently "is_nfs_export_available" might return even if the export is +-# not, in fact, available. (eyeroll) Give it a bit of extra time. +-# +-# TBD: fix the broken shell function instead of working around it here +-sleep 5 +- +-# a new mount should be added to the rmtab, not overwrite exiting ones +-TEST mount_nfs $H0:/$V0 $N0 nolock +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-EXPECT '2' count_lines $M0/rmtab +- +-# TODO: nfs/reconfigure() is never called and is therefor disabled. When the +-# NFS-server supports reloading and does not get restarted anymore, we should +-# add a test that includes the merging of entries in the old rmtab with the new +-# rmtab. +- +-cleanup +diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t +deleted file mode 100755 +index bd27915..0000000 +--- a/tests/bugs/nfs/bug-915280.t ++++ /dev/null +@@ -1,54 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-function volinfo_field() +-{ +- local vol=$1; +- local field=$2; +- +- $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; +-} +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2; +-EXPECT 'Created' volinfo_field $V0 'Status'; +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status'; +- +-MOUNTDIR=$N0; +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1 +-TEST touch $N0/testfile +- +-TEST $CLI volume set $V0 debug.error-gen client +-TEST $CLI volume set $V0 debug.error-fops stat +-TEST $CLI volume set $V0 debug.error-failure 100 +- +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +- +-pid_file=$(read_nfs_pidfile); +- +-getfacl $N0/testfile 2>/dev/null +- +-nfs_pid=$(get_nfs_pid); +-if [ ! $nfs_pid ] +-then +- nfs_pid=0; +-fi +- +-TEST [ $nfs_pid -eq $pid_file ] +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR +- +-cleanup; +diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t +deleted file mode 100755 +index 61be484..0000000 +--- a/tests/bugs/nfs/bug-970070.t ++++ /dev/null +@@ -1,13 +0,0 @@ +-#!/bin/bash +-# TEST the nfs.acl option +-. $(dirname $0)/../../include.rc +- +-cleanup +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume start $V0 +-TEST $CLI volume set $V0 nfs.acl off +-TEST $CLI volume set $V0 nfs.acl on +-cleanup +diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t +deleted file mode 100755 +index 975c46f..0000000 +--- a/tests/bugs/nfs/bug-974972.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-#This script checks that nfs mount does not fail lookup on files with split-brain +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +-TEST $CLI volume set $V0 self-heal-daemon off +-TEST $CLI volume set $V0 cluster.eager-lock off +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 +-TEST touch $N0/1 +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}1 +-echo abc > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-TEST kill_brick ${V0} ${H0} ${B0}/${V0}0 +-echo def > $N0/1 +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 +- +-#Lookup should not fail +-TEST ls $N0/1 +-TEST ! cat $N0/1 +- +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +-cleanup +diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t +deleted file mode 100644 +index f1b6859..0000000 +--- a/tests/bugs/nfs/showmount-many-clients.t ++++ /dev/null +@@ -1,41 +0,0 @@ +-#!/bin/bash +-# +-# The nfs.rpc-auth-allow volume option is used to generate the list of clients +-# that are displayed as able to mount the export. The "group" in the export +-# should be a list of all clients, identified by "name". In previous versions, +-# the "name" was the copied string from nfs.rpc-auth-allow. This is not +-# correct, as the volume option should be parsed and split into different +-# groups. +-# +-# When the single string is passed, this testcase fails when the +-# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting +-# the groups into their own structures, this testcase passes. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../nfs.rc +-. $(dirname $0)/../../volume.rc +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/brick1 +-EXPECT 'Created' volinfo_field $V0 'Status' +-TEST $CLI volume set $V0 nfs.disable false +- +-CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,) +-TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS} +-TEST $CLI volume set $V0 nfs.rpc-auth-reject all +- +-TEST $CLI volume start $V0; +-EXPECT 'Started' volinfo_field $V0 'Status' +- +-# glusterfs/nfs needs some time to start up in the background +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available +- +-# showmount should not timeout (no reply is sent on error) +-TEST showmount -e $H0 +- +-cleanup +diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py +deleted file mode 100755 +index eb507e1..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.py ++++ /dev/null +@@ -1,33 +0,0 @@ +-# +-# Create a unix domain socket and test if it is a socket (and not a fifo/pipe). +-# +-# Author: Niels de Vos <ndevos@redhat.com> +-# +- +-from __future__ import print_function +-import os +-import stat +-import sys +-import socket +- +-ret = 1 +- +-if len(sys.argv) != 2: +- print('Usage: %s <socket>' % (sys.argv[0])) +- sys.exit(ret) +- +-path = sys.argv[1] +- +-sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) +-sock.bind(path) +- +-stbuf = os.stat(path) +-mode = stbuf.st_mode +- +-if stat.S_ISSOCK(mode): +- ret = 0 +- +-sock.close() +-os.unlink(path) +- +-sys.exit(ret) +diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t +deleted file mode 100644 +index d9b9e95..0000000 +--- a/tests/bugs/nfs/socket-as-fifo.t ++++ /dev/null +@@ -1,25 +0,0 @@ +-#!/bin/bash +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# this is the actual test +-TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket +- +-TEST umount_nfs $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t +deleted file mode 100644 +index 6a11487..0000000 +--- a/tests/bugs/nfs/subdir-trailing-slash.t ++++ /dev/null +@@ -1,32 +0,0 @@ +-#!/bin/bash +-# +-# Verify that mounting a subdir over NFS works, even with a trailing / +-# +-# For example: +-# mount -t nfs server.example.com:/volume/subdir/ +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup; +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +- +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available +- +-TEST mount_nfs $H0:/$V0 $N0 nolock +-TEST mkdir -p $N0/subdir +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock +-EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +- +-cleanup +diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t +deleted file mode 100755 +index 2a94009..0000000 +--- a/tests/bugs/nfs/zero-atime.t ++++ /dev/null +@@ -1,33 +0,0 @@ +-#!/bin/bash +-# +-# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If +-# not set, these values are 0 and cause a atime/mtime set to the Epoch. +-# +- +-. $(dirname $0)/../../include.rc +-. $(dirname $0)/../../volume.rc +-. $(dirname $0)/../../nfs.rc +- +-#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST +- +-cleanup +- +-TEST glusterd +-TEST pidof glusterd +- +-TEST $CLI volume create $V0 $H0:$B0/$V0 +-TEST $CLI volume set $V0 nfs.disable false +-TEST $CLI volume start $V0 +-EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +-TEST mount_nfs $H0:/$V0 $N0 nolock +- +-# create a file for testing +-TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k +- +-# timezone in UTC results in atime=0 if not set correctly +-TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat +-TEST [ "$(stat --format=%X $M0/small)" != "0" ] +- +-TEST rm $M0/small +- +-cleanup +diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t +index 65af274..9ad0ab2 100755 +--- a/tests/bugs/rpc/bug-954057.t ++++ b/tests/bugs/rpc/bug-954057.t +@@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + + TEST mkdir $M0/dir + TEST mkdir $M0/nobody +-TEST chown nfsnobody:nfsnobody $M0/nobody ++grep nfsnobody /etc/passwd > /dev/nul ++if [ $? -eq 1 ]; then ++usr=nobody ++grp=nobody ++else ++usr=nfsnobody ++grp=nfsnobody ++fi ++TEST chown $usr:$grp $M0/nobody + TEST `echo "file" >> $M0/file` + TEST cp $M0/file $M0/new + TEST chmod 700 $M0/new +diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t +index 7628870..66e896a 100644 +--- a/tests/bugs/shard/bug-1272986.t ++++ b/tests/bugs/shard/bug-1272986.t +@@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 + + # Write some data into a file, such that its size crosses the shard block size. +-TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc ++TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct + + md5sum1_reader=$(md5sum $M0/file | awk '{print $1}') + + EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'` + + # Append some more data into the file. +-TEST `echo "abcdefg" >> $M1/file` ++TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct + +-md5sum2_reader=$(md5sum $M0/file | awk '{print $1}') ++md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}') + + # Test to see if the reader refreshes its cache correctly as part of the reads + # triggered through md5sum. If it does, then the md5sum on the reader and writer +diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t +index d4c0702..8070bc1 100755 +--- a/tests/bugs/transport/bug-873367.t ++++ b/tests/bugs/transport/bug-873367.t +@@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.* + mkdir -p $B0/1 + mkdir -p $M0 + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 3cb45b5..cae010c 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -41,7 +41,7 @@ function valid_ciphers { + -e '/:$/s///' + } + +-TEST openssl genrsa -out $SSL_KEY 1024 ++TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t +index 7e1e199..e4bcdf5 100644 +--- a/tests/features/ssl-ciphers.t ++++ b/tests/features/ssl-ciphers.t +@@ -33,18 +33,26 @@ wait_mount() { + openssl_connect() { + ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA" + ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR" +- #echo openssl s_client $ssl_opt $@ > /dev/tty +- #read -p "Continue? " nothing +- CIPHER=`echo "" | +- openssl s_client $ssl_opt $@ 2>/dev/null | +- awk '/^ Cipher/{print $3}'` +- if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then ++ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null" ++ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then + echo "N" + else + echo "Y" + fi + } + ++#Validate the cipher to pass EXPECT test case before call openssl_connect ++check_cipher() { ++ cmd="echo "" | openssl s_client $@ 2> /dev/null" ++ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') ++ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then ++ echo "N" ++ else ++ echo "Y" ++ fi ++} ++ + cleanup; + mkdir -p $B0 + mkdir -p $M0 +@@ -65,7 +73,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + +-TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null ++TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \ + -subj /CN=CA -out $SSL_CA + TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \ +@@ -106,28 +114,36 @@ EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT + EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT + + # Test a HIGH CBC cipher +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT + + # Test EECDH +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + # test MD5 fails +-EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT + + # test RC4 fails +-EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT + + # test eNULL fails +-EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT + + # test SHA2 +-EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT + + # test GCM +-EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT + + # Test DH fails without DH params +-EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT + + # Test DH with DH params + TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem +@@ -145,8 +161,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT + + # Test the ec-curve option + TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1 +@@ -155,8 +173,10 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +-EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve invalid + EXPECT invalid volume_option $V0 ssl.ec-curve +@@ -164,7 +184,8 @@ TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + BRICK_PORT=`brick_port $V0` +-EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT ++cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` ++EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT + + TEST $CLI volume set $V0 ssl.ec-curve secp521r1 + EXPECT secp521r1 volume_option $V0 ssl.ec-curve +diff --git a/tests/ssl.rc b/tests/ssl.rc +index 127f83f..b1ccc4c 100644 +--- a/tests/ssl.rc ++++ b/tests/ssl.rc +@@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/glusterfs.ca + + # Create self-signed certificates + function create_self_signed_certs (){ +- openssl genrsa -out $SSL_KEY 1024 ++ openssl genrsa -out $SSL_KEY 2048 + openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + return $? +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index b248767..b224abd 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -10,6883 +10,6417 @@ + + #include <unistd.h> + +-#include "shard.h" + #include "shard-mem-types.h" ++#include "shard.h" + #include <glusterfs/byte-order.h> + #include <glusterfs/defaults.h> + #include <glusterfs/statedump.h> + +-static gf_boolean_t +-__is_shard_dir(uuid_t gfid) +-{ +- shard_priv_t *priv = THIS->private; ++static gf_boolean_t __is_shard_dir(uuid_t gfid) { ++ shard_priv_t *priv = THIS->private; + +- if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) +- return _gf_true; ++ if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-static gf_boolean_t +-__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) +-{ +- if (frame->root->pid == GF_CLIENT_PID_GSYNCD && +- (__is_shard_dir(loc->pargfid) || +- (loc->parent && __is_shard_dir(loc->parent->gfid)))) +- return _gf_true; ++static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) { ++ if (frame->root->pid == GF_CLIENT_PID_GSYNCD && ++ (__is_shard_dir(loc->pargfid) || ++ (loc->parent && __is_shard_dir(loc->parent->gfid)))) ++ return _gf_true; + +- return _gf_false; ++ return _gf_false; + } + +-void +-shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(buf, len, "%s.%d", gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(buf, len, "%s.%d", gfid_str, block_num); + } + +-void +-shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; ++void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, ++ size_t len) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; + +- gf_uuid_unparse(gfid, gfid_str); +- snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); ++ gf_uuid_unparse(gfid, gfid_str); ++ snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); + } + +-int +-__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx_p = NULL; ++int __shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx_p = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret == 0) { +- *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; +- return ret; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret == 0) { ++ *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ return ret; ++ } + +- ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); +- if (!ctx_p) +- return ret; ++ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); ++ if (!ctx_p) ++ return ret; + +- INIT_LIST_HEAD(&ctx_p->ilist); +- INIT_LIST_HEAD(&ctx_p->to_fsync_list); ++ INIT_LIST_HEAD(&ctx_p->ilist); ++ INIT_LIST_HEAD(&ctx_p->to_fsync_list); + +- ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); +- if (ret < 0) { +- GF_FREE(ctx_p); +- return ret; +- } ++ ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); ++ if (ret < 0) { ++ GF_FREE(ctx_p); ++ return ret; ++ } + +- *ctx = ctx_p; ++ *ctx = ctx_p; + +- return ret; ++ return ret; + } + +-int +-shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +-{ +- int ret = 0; ++int shard_inode_ctx_get(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t **ctx) { ++ int ret = 0; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get(inode, this, ctx); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get(inode, this, ctx); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if (valid & SHARD_MASK_BLOCK_SIZE) +- ctx->block_size = block_size; ++ if (valid & SHARD_MASK_BLOCK_SIZE) ++ ctx->block_size = block_size; + +- if (valid & SHARD_MASK_PROT) +- ctx->stat.ia_prot = stbuf->ia_prot; ++ if (valid & SHARD_MASK_PROT) ++ ctx->stat.ia_prot = stbuf->ia_prot; + +- if (valid & SHARD_MASK_NLINK) +- ctx->stat.ia_nlink = stbuf->ia_nlink; ++ if (valid & SHARD_MASK_NLINK) ++ ctx->stat.ia_nlink = stbuf->ia_nlink; + +- if (valid & SHARD_MASK_UID) +- ctx->stat.ia_uid = stbuf->ia_uid; ++ if (valid & SHARD_MASK_UID) ++ ctx->stat.ia_uid = stbuf->ia_uid; + +- if (valid & SHARD_MASK_GID) +- ctx->stat.ia_gid = stbuf->ia_gid; ++ if (valid & SHARD_MASK_GID) ++ ctx->stat.ia_gid = stbuf->ia_gid; + +- if (valid & SHARD_MASK_SIZE) +- ctx->stat.ia_size = stbuf->ia_size; ++ if (valid & SHARD_MASK_SIZE) ++ ctx->stat.ia_size = stbuf->ia_size; + +- if (valid & SHARD_MASK_BLOCKS) +- ctx->stat.ia_blocks = stbuf->ia_blocks; ++ if (valid & SHARD_MASK_BLOCKS) ++ ctx->stat.ia_blocks = stbuf->ia_blocks; + +- if (valid & SHARD_MASK_TIMES) { +- SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, +- stbuf->ia_mtime, stbuf->ia_mtime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, +- stbuf->ia_ctime, stbuf->ia_ctime_nsec); +- SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, +- stbuf->ia_atime, stbuf->ia_atime_nsec); +- } ++ if (valid & SHARD_MASK_TIMES) { ++ SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, ++ stbuf->ia_mtime, stbuf->ia_mtime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, ++ stbuf->ia_ctime, stbuf->ia_ctime_nsec); ++ SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, ++ stbuf->ia_atime, stbuf->ia_atime_nsec); ++ } + +- if (valid & SHARD_MASK_OTHERS) { +- ctx->stat.ia_ino = stbuf->ia_ino; +- gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); +- ctx->stat.ia_dev = stbuf->ia_dev; +- ctx->stat.ia_type = stbuf->ia_type; +- ctx->stat.ia_rdev = stbuf->ia_rdev; +- ctx->stat.ia_blksize = stbuf->ia_blksize; +- } ++ if (valid & SHARD_MASK_OTHERS) { ++ ctx->stat.ia_ino = stbuf->ia_ino; ++ gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); ++ ctx->stat.ia_dev = stbuf->ia_dev; ++ ctx->stat.ia_type = stbuf->ia_type; ++ ctx->stat.ia_rdev = stbuf->ia_rdev; ++ ctx->stat.ia_blksize = stbuf->ia_blksize; ++ } + +- if (valid & SHARD_MASK_REFRESH_RESET) +- ctx->refresh = _gf_false; ++ if (valid & SHARD_MASK_REFRESH_RESET) ++ ctx->refresh = _gf_false; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, +- uint64_t block_size, int32_t valid) +-{ +- int ret = -1; ++int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, ++ uint64_t block_size, int32_t valid) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refresh = _gf_true; ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } +-int +-shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_set_refresh_flag(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_set_refresh_flag(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- ctx->refreshed = _gf_true; +- return 0; ++ ctx->refreshed = _gf_true; ++ return 0; + } + +-int +-shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; ++int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; +- shard_inode_ctx_t *base_ictx = NULL; +- shard_inode_ctx_t *shard_ictx = NULL; +- +- ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (ret) +- return ret; ++int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; ++ shard_inode_ctx_t *base_ictx = NULL; ++ shard_inode_ctx_t *shard_ictx = NULL; + +- ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (ret) ++ return ret; + +- if (shard_ictx->fsync_needed) { +- shard_ictx->fsync_needed++; +- return 1; +- } ++ ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); ++ if (ret) ++ return ret; + +- list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); +- shard_ictx->inode = shard_inode; ++ if (shard_ictx->fsync_needed) { + shard_ictx->fsync_needed++; +- base_ictx->fsync_count++; +- shard_ictx->base_inode = base_inode; ++ return 1; ++ } + +- return 0; ++ list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); ++ shard_ictx->inode = shard_inode; ++ shard_ictx->fsync_needed++; ++ base_ictx->fsync_count++; ++ shard_ictx->base_inode = base_inode; ++ ++ return 0; + } + +-int +-shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, +- inode_t *shard_inode) +-{ +- int ret = -1; ++int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, ++ inode_t *shard_inode) { ++ int ret = -1; + +- /* This ref acts as a refkeepr on the base inode. We +- * need to keep this inode alive as it holds the head +- * of the to_fsync_list. +- */ +- inode_ref(base_inode); +- inode_ref(shard_inode); ++ /* This ref acts as a refkeepr on the base inode. We ++ * need to keep this inode alive as it holds the head ++ * of the to_fsync_list. ++ */ ++ inode_ref(base_inode); ++ inode_ref(shard_inode); + +- LOCK(&base_inode->lock); +- LOCK(&shard_inode->lock); +- { +- ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, +- shard_inode); +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&base_inode->lock); ++ LOCK(&base_inode->lock); ++ LOCK(&shard_inode->lock); ++ { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&base_inode->lock); + +- /* Unref the base inode corresponding to the ref above, if the shard is +- * found to be already part of the fsync list. +- */ +- if (ret != 0) { +- inode_unref(base_inode); +- inode_unref(shard_inode); +- } +- return ret; ++ /* Unref the base inode corresponding to the ref above, if the shard is ++ * found to be already part of the fsync list. ++ */ ++ if (ret != 0) { ++ inode_unref(base_inode); ++ inode_unref(shard_inode); ++ } ++ return ret; + } + +-gf_boolean_t +-__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- /* If inode ctx get fails, better to err on the side of caution and +- * try again? Unless the failure is due to mem-allocation. +- */ +- if (ret) +- return _gf_true; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ /* If inode ctx get fails, better to err on the side of caution and ++ * try again? Unless the failure is due to mem-allocation. ++ */ ++ if (ret) ++ return _gf_true; + +- return !ctx->refreshed; ++ return !ctx->refreshed; + } + +-gf_boolean_t +-shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +-{ +- gf_boolean_t flag = _gf_false; ++gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { ++ gf_boolean_t flag = _gf_false; + +- LOCK(&inode->lock); +- { +- flag = __shard_inode_ctx_needs_lookup(inode, this); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { flag = __shard_inode_ctx_needs_lookup(inode, this); } ++ UNLOCK(&inode->lock); + +- return flag; ++ return flag; + } +-int +-__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __shard_inode_ctx_get(inode, this, &ctx); +- if (ret) +- return ret; ++ ret = __shard_inode_ctx_get(inode, this, &ctx); ++ if (ret) ++ return ret; + +- if ((stbuf->ia_size != ctx->stat.ia_size) || +- (stbuf->ia_blocks != ctx->stat.ia_blocks)) +- ctx->refresh = _gf_true; ++ if ((stbuf->ia_size != ctx->stat.ia_size) || ++ (stbuf->ia_blocks != ctx->stat.ia_blocks)) ++ ctx->refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +-{ +- int ret = -1; ++int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, ++ struct iatt *stbuf) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_invalidate(inode, this, stbuf); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *block_size = ctx->block_size; ++ *block_size = ctx->block_size; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, +- uint64_t *block_size) +-{ +- int ret = -1; ++int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, ++ uint64_t *block_size) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_block_size(inode, this, block_size); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- *fsync_count = ctx->fsync_needed; ++ *fsync_count = ctx->fsync_needed; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, +- int *fsync_count) +-{ +- int ret = -1; ++int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, ++ int *fsync_count) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } +-int +-__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); +- return 0; ++ memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); ++ return 0; + } + +-int +-shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, +- shard_inode_ctx_t *ctx_out) +-{ +- int ret = -1; ++int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, ++ shard_inode_ctx_t *ctx_out) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_get_all(inode, this, ctx_out); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-int +-__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (ctx->refresh == _gf_false) +- *buf = ctx->stat; +- else +- *need_refresh = _gf_true; ++ if (ctx->refresh == _gf_false) ++ *buf = ctx->stat; ++ else ++ *need_refresh = _gf_true; + +- return 0; ++ return 0; + } + +-int +-shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, +- struct iatt *buf, +- gf_boolean_t *need_refresh) +-{ +- int ret = -1; ++int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, ++ struct iatt *buf, ++ gf_boolean_t *need_refresh) { ++ int ret = -1; + +- LOCK(&inode->lock); +- { +- ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, +- need_refresh); +- } +- UNLOCK(&inode->lock); ++ LOCK(&inode->lock); ++ { ++ ret = ++ __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh); ++ } ++ UNLOCK(&inode->lock); + +- return ret; ++ return ret; + } + +-void +-shard_local_wipe(shard_local_t *local) +-{ +- int i = 0; +- int count = 0; +- +- count = local->num_blocks; +- +- syncbarrier_destroy(&local->barrier); +- loc_wipe(&local->loc); +- loc_wipe(&local->dot_shard_loc); +- loc_wipe(&local->dot_shard_rm_loc); +- loc_wipe(&local->loc2); +- loc_wipe(&local->tmp_loc); +- loc_wipe(&local->int_inodelk.loc); +- loc_wipe(&local->int_entrylk.loc); +- loc_wipe(&local->newloc); +- +- if (local->int_entrylk.basename) +- GF_FREE(local->int_entrylk.basename); +- if (local->fd) +- fd_unref(local->fd); +- +- if (local->xattr_req) +- dict_unref(local->xattr_req); +- if (local->xattr_rsp) +- dict_unref(local->xattr_rsp); +- +- for (i = 0; i < count; i++) { +- if (!local->inode_list) +- break; +- +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- +- GF_FREE(local->inode_list); +- +- GF_FREE(local->vector); +- if (local->iobref) +- iobref_unref(local->iobref); +- if (local->list_inited) +- gf_dirent_free(&local->entries_head); +- if (local->inodelk_frame) +- SHARD_STACK_DESTROY(local->inodelk_frame); +- if (local->entrylk_frame) +- SHARD_STACK_DESTROY(local->entrylk_frame); +-} +- +-int +-shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) +-{ +- int ret = -1; +- void *size_attr = NULL; +- uint64_t size_array[4]; +- +- ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, +- SHARD_MSG_INTERNAL_XATTR_MISSING, +- "Failed to " +- "get " GF_XATTR_SHARD_FILE_SIZE " for %s", +- uuid_utoa(stbuf->ia_gfid)); +- return ret; +- } ++void shard_local_wipe(shard_local_t *local) { ++ int i = 0; ++ int count = 0; + +- memcpy(size_array, size_attr, sizeof(size_array)); ++ count = local->num_blocks; + +- stbuf->ia_size = ntoh64(size_array[0]); +- stbuf->ia_blocks = ntoh64(size_array[2]); ++ syncbarrier_destroy(&local->barrier); ++ loc_wipe(&local->loc); ++ loc_wipe(&local->dot_shard_loc); ++ loc_wipe(&local->dot_shard_rm_loc); ++ loc_wipe(&local->loc2); ++ loc_wipe(&local->tmp_loc); ++ loc_wipe(&local->int_inodelk.loc); ++ loc_wipe(&local->int_entrylk.loc); ++ loc_wipe(&local->newloc); + +- return 0; +-} ++ if (local->int_entrylk.basename) ++ GF_FREE(local->int_entrylk.basename); ++ if (local->fd) ++ fd_unref(local->fd); + +-int +-shard_call_count_return(call_frame_t *frame) +-{ +- int call_count = 0; +- shard_local_t *local = NULL; ++ if (local->xattr_req) ++ dict_unref(local->xattr_req); ++ if (local->xattr_rsp) ++ dict_unref(local->xattr_rsp); + +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ if (!local->inode_list) ++ break; ++ ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ ++ GF_FREE(local->inode_list); ++ ++ GF_FREE(local->vector); ++ if (local->iobref) ++ iobref_unref(local->iobref); ++ if (local->list_inited) ++ gf_dirent_free(&local->entries_head); ++ if (local->inodelk_frame) ++ SHARD_STACK_DESTROY(local->inodelk_frame); ++ if (local->entrylk_frame) ++ SHARD_STACK_DESTROY(local->entrylk_frame); ++} ++ ++int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) { ++ int ret = -1; ++ void *size_attr = NULL; ++ uint64_t size_array[4]; ++ ++ ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, ++ SHARD_MSG_INTERNAL_XATTR_MISSING, ++ "Failed to " ++ "get " GF_XATTR_SHARD_FILE_SIZE " for %s", ++ uuid_utoa(stbuf->ia_gfid)); ++ return ret; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ ++ stbuf->ia_size = ntoh64(size_array[0]); ++ stbuf->ia_blocks = ntoh64(size_array[2]); ++ ++ return 0; ++} ++ ++int shard_call_count_return(call_frame_t *frame) { ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ LOCK(&frame->lock); ++ { call_count = --local->call_count; } ++ UNLOCK(&frame->lock); ++ ++ return call_count; ++} ++ ++static char *shard_internal_dir_string(shard_internal_dir_type_t type) { ++ char *str = NULL; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ str = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ str = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ return str; ++} ++ ++static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ char *bname = NULL; ++ inode_t *parent = NULL; ++ loc_t *internal_dir_loc = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ if (!local) ++ return -1; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ internal_dir_loc = &local->dot_shard_loc; ++ bname = GF_SHARD_DIR; ++ parent = inode_ref(this->itable->root); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ internal_dir_loc = &local->dot_shard_rm_loc; ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ parent = inode_ref(priv->dot_shard_inode); ++ break; ++ default: ++ break; ++ } ++ ++ internal_dir_loc->inode = inode_new(this->itable); ++ internal_dir_loc->parent = parent; ++ ret = inode_path(internal_dir_loc->parent, bname, ++ (char **)&internal_dir_loc->path); ++ if (ret < 0 || !(internal_dir_loc->inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", bname); ++ goto out; ++ } ++ ++ internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); ++ if (internal_dir_loc->name) ++ internal_dir_loc->name++; ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, ++ inode_t *base_inode, int block_num, ++ uuid_t gfid) { ++ char block_bname[256] = { ++ 0, ++ }; ++ inode_t *lru_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *lru_inode_ctx = NULL; ++ shard_inode_ctx_t *lru_base_inode_ctx = NULL; ++ inode_t *fsync_inode = NULL; ++ inode_t *lru_base_inode = NULL; ++ gf_boolean_t do_fsync = _gf_false; ++ ++ priv = this->private; ++ ++ shard_inode_ctx_get(linked_inode, this, &ctx); ++ ++ if (list_empty(&ctx->ilist)) { ++ if (priv->inode_count + 1 <= priv->lru_limit) { ++ /* If this inode was linked here for the first time (indicated ++ * by empty list), and if there is still space in the priv list, ++ * add this ctx to the tail of the list. ++ */ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ priv->inode_count++; ++ ctx->base_inode = inode_ref(base_inode); ++ } else { ++ /*If on the other hand there is no available slot for this inode ++ * in the list, delete the lru inode from the head of the list, ++ * unlink it. And in its place add this new inode into the list. ++ */ ++ lru_inode_ctx = ++ list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist); ++ GF_ASSERT(lru_inode_ctx->block_num > 0); ++ lru_base_inode = lru_inode_ctx->base_inode; ++ list_del_init(&lru_inode_ctx->ilist); ++ lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid); ++ /* If the lru inode was part of the pending-fsync list, ++ * the base inode needs to be unref'd, the lru inode ++ * deleted from fsync list and fsync'd in a new frame, ++ * and then unlinked in memory and forgotten. ++ */ ++ if (!lru_base_inode) ++ goto after_fsync_check; ++ LOCK(&lru_base_inode->lock); ++ LOCK(&lru_inode->lock); ++ { ++ if (!list_empty(&lru_inode_ctx->to_fsync_list)) { ++ list_del_init(&lru_inode_ctx->to_fsync_list); ++ lru_inode_ctx->fsync_needed = 0; ++ do_fsync = _gf_true; ++ __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx); ++ lru_base_inode_ctx->fsync_count--; ++ } ++ } ++ UNLOCK(&lru_inode->lock); ++ UNLOCK(&lru_base_inode->lock); ++ ++ after_fsync_check: ++ if (!do_fsync) { ++ shard_make_block_bname(lru_inode_ctx->block_num, ++ lru_inode_ctx->base_gfid, block_bname, ++ sizeof(block_bname)); ++ /* The following unref corresponds to the ref held at ++ * the time the shard was added to the lru list. ++ */ ++ inode_unref(lru_inode); ++ inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); ++ inode_forget(lru_inode, 0); ++ } else { ++ /* The following unref corresponds to the ref ++ * held when the shard was added to fsync list. ++ */ ++ inode_unref(lru_inode); ++ fsync_inode = lru_inode; ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ } ++ /* The following unref corresponds to the ref ++ * held by inode_find() above. ++ */ ++ inode_unref(lru_inode); ++ ++ /* The following unref corresponds to the ref held on the base shard ++ * at the time of adding shard inode to lru list ++ */ ++ if (lru_base_inode) ++ inode_unref(lru_base_inode); ++ ++ /* For as long as an inode is in lru list, we try to ++ * keep it alive by holding a ref on it. ++ */ ++ inode_ref(linked_inode); ++ if (base_inode) ++ gf_uuid_copy(ctx->base_gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(ctx->base_gfid, gfid); ++ ctx->block_num = block_num; ++ ctx->base_inode = inode_ref(base_inode); ++ list_add_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ } else { ++ /* If this is not the first time this inode is being operated on, move ++ * it to the most recently used end of the list. ++ */ ++ list_move_tail(&ctx->ilist, &priv->ilist_head); ++ } ++ return fsync_inode; ++} ++ ++int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, ++ int32_t op_ret, int32_t op_errno) { ++ switch (fop) { ++ case GF_FOP_LOOKUP: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL); ++ break; ++ case GF_FOP_STAT: ++ SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSTAT: ++ SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_TRUNCATE: ++ SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FTRUNCATE: ++ SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_MKNOD: ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_LINK: ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_CREATE: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_UNLINK: ++ SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_RENAME: ++ SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ break; ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_READ: ++ SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL, ++ NULL); ++ break; ++ case GF_FOP_FSYNC: ++ SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_REMOVEXATTR: ++ SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FREMOVEXATTR: ++ SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_FGETXATTR: ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_GETXATTR: ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); ++ break; ++ case GF_FOP_FSETXATTR: ++ SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETXATTR: ++ SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); ++ break; ++ case GF_FOP_SETATTR: ++ SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_FSETATTR: ++ SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL); ++ break; ++ case GF_FOP_SEEK: ++ SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, ++ call_frame_t *frame, ++ int32_t op_ret) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_FALLOCATE: ++ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_ZEROFILL: ++ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ case GF_FOP_DISCARD: ++ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, ++ &local->postbuf, local->xattr_rsp); ++ break; ++ default: ++ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ char block_bname[256] = { ++ 0, ++ }; ++ fd_t *anon_fd = cookie; ++ inode_t *shard_inode = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ ++ if (anon_fd == NULL || op_ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, ++ "fsync failed on shard"); ++ goto out; ++ } ++ shard_inode = anon_fd->inode; ++ ++ LOCK(&priv->lock); ++ LOCK(&shard_inode->lock); ++ { ++ __shard_inode_ctx_get(shard_inode, this, &ctx); ++ if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { ++ shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname, ++ sizeof(block_bname)); ++ inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); ++ /* The following unref corresponds to the ref held by ++ * inode_link() at the time the shard was created or ++ * looked up ++ */ ++ inode_unref(shard_inode); ++ inode_forget(shard_inode, 0); ++ } ++ } ++ UNLOCK(&shard_inode->lock); ++ UNLOCK(&priv->lock); + +- LOCK(&frame->lock); +- { +- call_count = --local->call_count; ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ STACK_DESTROY(frame->root); ++ return 0; ++} ++ ++int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) { ++ fd_t *anon_fd = NULL; ++ call_frame_t *fsync_frame = NULL; ++ ++ fsync_frame = create_frame(this, this->ctx->pool); ++ if (!fsync_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to fsync shard"); ++ return -1; ++ } ++ ++ anon_fd = fd_anonymous(inode); ++ if (!anon_fd) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create anon fd to" ++ " fsync shard"); ++ STACK_DESTROY(fsync_frame->root); ++ return -1; ++ } ++ ++ STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, ++ 1, NULL); ++ return 0; ++} ++ ++int shard_common_resolve_shards( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler) { ++ int i = -1; ++ uint32_t shard_idx_iter = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *res_inode = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ local->call_count = 0; ++ shard_idx_iter = local->first_block; ++ res_inode = local->resolver_base_inode; ++ if (res_inode) ++ gf_uuid_copy(gfid, res_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if ((local->op_ret < 0) || (local->resolve_not)) ++ goto out; ++ ++ while (shard_idx_iter <= local->last_block) { ++ i++; ++ if (shard_idx_iter == 0) { ++ local->inode_list[i] = inode_ref(res_inode); ++ shard_idx_iter++; ++ continue; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ inode = NULL; ++ inode = inode_resolve(this->itable, path); ++ if (inode) { ++ gf_msg_debug(this->name, 0, "Shard %d already " ++ "present. gfid=%s. Saving inode for future.", ++ shard_idx_iter, uuid_utoa(inode->gfid)); ++ local->inode_list[i] = inode; ++ /* Let the ref on the inodes that are already present ++ * in inode table still be held so that they don't get ++ * forgotten by the time the fop reaches the actual ++ * write stage. ++ */ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode, ++ shard_idx_iter, gfid); ++ } ++ UNLOCK(&priv->lock); ++ shard_idx_iter++; ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++ continue; ++ } else { ++ local->call_count++; ++ shard_idx_iter++; + } +- UNLOCK(&frame->lock); ++ } ++out: ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_update_file_size_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ dict_t *dict, dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if ((local->fd) && (local->fd->inode)) ++ inode = local->fd->inode; ++ else if (local->loc.inode) ++ inode = local->loc.inode; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size" ++ " xattr failed on %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } + +- return call_count; ++ if (shard_modify_size_and_block_count(&local->postbuf, dict)) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++err: ++ local->post_update_size_handler(frame, this); ++ return 0; + } + +-static char * +-shard_internal_dir_string(shard_internal_dir_type_t type) +-{ +- char *str = NULL; ++int shard_set_size_attrs(int64_t size, int64_t block_count, ++ int64_t **size_attr_p) { ++ int ret = -1; ++ int64_t *size_attr = NULL; + +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- str = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- str = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- return str; +-} +- +-static int +-shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- char *bname = NULL; +- inode_t *parent = NULL; +- loc_t *internal_dir_loc = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- if (!local) +- return -1; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- internal_dir_loc = &local->dot_shard_loc; +- bname = GF_SHARD_DIR; +- parent = inode_ref(this->itable->root); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- internal_dir_loc = &local->dot_shard_rm_loc; +- bname = GF_SHARD_REMOVE_ME_DIR; +- parent = inode_ref(priv->dot_shard_inode); +- break; +- default: +- break; +- } ++ if (!size_attr_p) ++ goto out; + +- internal_dir_loc->inode = inode_new(this->itable); +- internal_dir_loc->parent = parent; +- ret = inode_path(internal_dir_loc->parent, bname, +- (char **)&internal_dir_loc->path); +- if (ret < 0 || !(internal_dir_loc->inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", bname); +- goto out; +- } ++ size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); ++ if (!size_attr) ++ goto out; + +- internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); +- if (internal_dir_loc->name) +- internal_dir_loc->name++; ++ size_attr[0] = hton64(size); ++ /* As sharding evolves, it _may_ be necessary to embed more pieces of ++ * information within the same xattr. So allocating slots for them in ++ * advance. For now, only bytes 0-63 and 128-191 which would make up the ++ * current size and block count respectively of the file are valid. ++ */ ++ size_attr[2] = hton64(block_count); + +- ret = 0; ++ *size_attr_p = size_attr; ++ ++ ret = 0; + out: +- return ret; ++ return ret; + } + +-inode_t * +-__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, +- inode_t *base_inode, int block_num, +- uuid_t gfid) +-{ +- char block_bname[256] = { +- 0, +- }; +- inode_t *lru_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *lru_inode_ctx = NULL; +- shard_inode_ctx_t *lru_base_inode_ctx = NULL; +- inode_t *fsync_inode = NULL; +- inode_t *lru_base_inode = NULL; +- gf_boolean_t do_fsync = _gf_false; +- +- priv = this->private; +- +- shard_inode_ctx_get(linked_inode, this, &ctx); +- +- if (list_empty(&ctx->ilist)) { +- if (priv->inode_count + 1 <= priv->lru_limit) { +- /* If this inode was linked here for the first time (indicated +- * by empty list), and if there is still space in the priv list, +- * add this ctx to the tail of the list. +- */ +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- priv->inode_count++; +- ctx->base_inode = inode_ref(base_inode); +- } else { +- /*If on the other hand there is no available slot for this inode +- * in the list, delete the lru inode from the head of the list, +- * unlink it. And in its place add this new inode into the list. +- */ +- lru_inode_ctx = list_first_entry(&priv->ilist_head, +- shard_inode_ctx_t, ilist); +- GF_ASSERT(lru_inode_ctx->block_num > 0); +- lru_base_inode = lru_inode_ctx->base_inode; +- list_del_init(&lru_inode_ctx->ilist); +- lru_inode = inode_find(linked_inode->table, +- lru_inode_ctx->stat.ia_gfid); +- /* If the lru inode was part of the pending-fsync list, +- * the base inode needs to be unref'd, the lru inode +- * deleted from fsync list and fsync'd in a new frame, +- * and then unlinked in memory and forgotten. +- */ +- if (!lru_base_inode) +- goto after_fsync_check; +- LOCK(&lru_base_inode->lock); +- LOCK(&lru_inode->lock); +- { +- if (!list_empty(&lru_inode_ctx->to_fsync_list)) { +- list_del_init(&lru_inode_ctx->to_fsync_list); +- lru_inode_ctx->fsync_needed = 0; +- do_fsync = _gf_true; +- __shard_inode_ctx_get(lru_base_inode, this, +- &lru_base_inode_ctx); +- lru_base_inode_ctx->fsync_count--; +- } +- } +- UNLOCK(&lru_inode->lock); +- UNLOCK(&lru_base_inode->lock); +- +- after_fsync_check: +- if (!do_fsync) { +- shard_make_block_bname(lru_inode_ctx->block_num, +- lru_inode_ctx->base_gfid, block_bname, +- sizeof(block_bname)); +- /* The following unref corresponds to the ref held at +- * the time the shard was added to the lru list. +- */ +- inode_unref(lru_inode); +- inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); +- inode_forget(lru_inode, 0); +- } else { +- /* The following unref corresponds to the ref +- * held when the shard was added to fsync list. +- */ +- inode_unref(lru_inode); +- fsync_inode = lru_inode; +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- } +- /* The following unref corresponds to the ref +- * held by inode_find() above. +- */ +- inode_unref(lru_inode); +- +- /* The following unref corresponds to the ref held on the base shard +- * at the time of adding shard inode to lru list +- */ +- if (lru_base_inode) +- inode_unref(lru_base_inode); +- +- /* For as long as an inode is in lru list, we try to +- * keep it alive by holding a ref on it. +- */ +- inode_ref(linked_inode); +- if (base_inode) +- gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +- else +- gf_uuid_copy(ctx->base_gfid, gfid); +- ctx->block_num = block_num; +- ctx->base_inode = inode_ref(base_inode); +- list_add_tail(&ctx->ilist, &priv->ilist_head); +- } +- } else { +- /* If this is not the first time this inode is being operated on, move +- * it to the most recently used end of the list. +- */ +- list_move_tail(&ctx->ilist, &priv->ilist_head); +- } +- return fsync_inode; +-} ++int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ loc_t *loc, ++ shard_post_update_size_fop_handler_t handler) { ++ int ret = -1; ++ int64_t *size_attr = NULL; ++ int64_t delta_blocks = 0; ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; + +-int +-shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, +- int32_t op_ret, int32_t op_errno) +-{ +- switch (fop) { +- case GF_FOP_LOOKUP: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_STAT: +- SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSTAT: +- SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_TRUNCATE: +- SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FTRUNCATE: +- SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_MKNOD: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_LINK: +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, +- NULL, NULL); +- break; +- case GF_FOP_CREATE: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_UNLINK: +- SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_RENAME: +- SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, +- NULL, NULL, NULL, NULL); +- break; +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_READ: +- SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, +- NULL, NULL); +- break; +- case GF_FOP_FSYNC: +- SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_REMOVEXATTR: +- SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FREMOVEXATTR: +- SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_FGETXATTR: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_GETXATTR: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); +- break; +- case GF_FOP_FSETXATTR: +- SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETXATTR: +- SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); +- break; +- case GF_FOP_SETATTR: +- SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_FSETATTR: +- SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, +- NULL); +- break; +- case GF_FOP_SEEK: +- SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ local = frame->local; ++ local->post_update_size_handler = handler; + +-int +-shard_common_inode_write_success_unwind(glusterfs_fop_t fop, +- call_frame_t *frame, int32_t op_ret) +-{ +- shard_local_t *local = NULL; ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ if (fd) ++ inode = fd->inode; ++ else ++ inode = loc->inode; ++ ++ /* If both size and block count have not changed, then skip the xattrop. ++ */ ++ delta_blocks = GF_ATOMIC_GET(local->delta_blocks); ++ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { ++ goto out; ++ } ++ ++ ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks, ++ &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, ++ "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE, ++ uuid_utoa(inode->gfid)); ++ GF_FREE(size_attr); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } + +- local = frame->local; ++ if (fd) ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); ++ else ++ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64, ++ xattr_req, NULL); + +- switch (fop) { +- case GF_FOP_WRITE: +- SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_FALLOCATE: +- SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_ZEROFILL: +- SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- case GF_FOP_DISCARD: +- SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, +- &local->postbuf, local->xattr_rsp); +- break; +- default: +- gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +-} ++ dict_unref(xattr_req); ++ return 0; + +-int +-shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- char block_bname[256] = { +- 0, +- }; +- fd_t *anon_fd = cookie; +- inode_t *shard_inode = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++out: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; ++} ++ ++static inode_t *shard_link_internal_dir_inode(shard_local_t *local, ++ inode_t *inode, struct iatt *buf, ++ shard_internal_dir_type_t type) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ char *bname = NULL; ++ inode_t **priv_inode = NULL; ++ inode_t *parent = NULL; ++ ++ priv = THIS->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ bname = GF_SHARD_DIR; ++ priv_inode = &priv->dot_shard_inode; ++ parent = inode->table->root; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ priv_inode = &priv->dot_shard_rm_inode; ++ parent = priv->dot_shard_inode; ++ break; ++ default: ++ break; ++ } ++ ++ linked_inode = inode_link(inode, parent, bname, buf); ++ inode_lookup(linked_inode); ++ *priv_inode = linked_inode; ++ return linked_inode; ++} ++ ++int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ shard_local_t *local = NULL; ++ inode_t *linked_inode = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ /* To-Do: Fix refcount increment per call to ++ * shard_link_internal_dir_inode(). ++ */ ++ linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ shard_inode_ctx_mark_dir_refreshed(linked_inode, this); ++out: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_internal_dir_type_t type) { ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ break; ++ default: ++ break; ++ } ++ ++ inode = inode_find(this->itable, gfid); ++ ++ if (!shard_inode_ctx_needs_lookup(inode, this)) { ++ local->op_ret = 0; ++ goto out; ++ } + +- priv = this->private; ++ /* Plain assignment because the ref is already taken above through ++ * call to inode_find() ++ */ ++ loc.inode = inode; ++ gf_uuid_copy(loc.gfid, gfid); + +- if (anon_fd == NULL || op_ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, +- "fsync failed on shard"); +- goto out; +- } +- shard_inode = anon_fd->inode; ++ STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, ++ NULL); ++ loc_wipe(&loc); + +- LOCK(&priv->lock); +- LOCK(&shard_inode->lock); +- { +- __shard_inode_ctx_get(shard_inode, this, &ctx); +- if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { +- shard_make_block_bname(ctx->block_num, shard_inode->gfid, +- block_bname, sizeof(block_bname)); +- inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); +- /* The following unref corresponds to the ref held by +- * inode_link() at the time the shard was created or +- * looked up +- */ +- inode_unref(shard_inode); +- inode_forget(shard_inode, 0); +- } +- } +- UNLOCK(&shard_inode->lock); +- UNLOCK(&priv->lock); ++ return 0; + + out: +- if (anon_fd) +- fd_unref(anon_fd); +- STACK_DESTROY(frame->root); +- return 0; ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; + } + +-int +-shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) +-{ +- fd_t *anon_fd = NULL; +- call_frame_t *fsync_frame = NULL; ++int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; + +- fsync_frame = create_frame(this, this->ctx->pool); +- if (!fsync_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to fsync shard"); +- return -1; +- } ++ local = frame->local; + +- anon_fd = fd_anonymous(inode); +- if (!anon_fd) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create anon fd to" +- " fsync shard"); +- STACK_DESTROY(fsync_frame->root); +- return -1; +- } ++ if (op_ret) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ if (!IA_ISDIR(buf->ia_type)) { ++ gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, ++ "%s already exists and " ++ "is not a directory. Please remove it from all bricks " ++ "and try again", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = EIO; ++ goto unwind; ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; + +- STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, 1, NULL); +- return 0; +-} ++unwind: ++ local->post_res_handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t post_res_handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ local->post_res_handler = post_res_handler; ++ ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid of %s into dict", ++ shard_internal_dir_string(type)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } + +-int +-shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler) +-{ +- int i = -1; +- uint32_t shard_idx_iter = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *res_inode = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- local->call_count = 0; +- shard_idx_iter = local->first_block; +- res_inode = local->resolver_base_inode; +- if (res_inode) +- gf_uuid_copy(gfid, res_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, ++ xattr_req); + +- if ((local->op_ret < 0) || (local->resolve_not)) +- goto out; ++ dict_unref(xattr_req); ++ return 0; + +- while (shard_idx_iter <= local->last_block) { +- i++; +- if (shard_idx_iter == 0) { +- local->inode_list[i] = inode_ref(res_inode); +- shard_idx_iter++; +- continue; +- } ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ if (free_gfid) ++ GF_FREE(gfid); ++ post_res_handler(frame, this); ++ return 0; ++} ++ ++static void shard_inode_ctx_update(inode_t *inode, xlator_t *this, ++ dict_t *xdata, struct iatt *buf) { ++ int ret = 0; ++ uint64_t size = 0; ++ void *bsize = NULL; ++ ++ if (shard_inode_ctx_get_block_size(inode, this, &size)) { ++ /* Fresh lookup */ ++ ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (!ret) ++ size = ntoh64(*((uint64_t *)bsize)); ++ /* If the file is sharded, set its block size, otherwise just ++ * set 0. ++ */ + +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- inode = NULL; +- inode = inode_resolve(this->itable, path); +- if (inode) { +- gf_msg_debug(this->name, 0, +- "Shard %d already " +- "present. gfid=%s. Saving inode for future.", +- shard_idx_iter, uuid_utoa(inode->gfid)); +- local->inode_list[i] = inode; +- /* Let the ref on the inodes that are already present +- * in inode table still be held so that they don't get +- * forgotten by the time the fop reaches the actual +- * write stage. +- */ +- LOCK(&priv->lock); +- { +- fsync_inode = __shard_update_shards_inode_list( +- inode, this, res_inode, shard_idx_iter, gfid); +- } +- UNLOCK(&priv->lock); +- shard_idx_iter++; +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +- continue; +- } else { +- local->call_count++; +- shard_idx_iter++; +- } +- } +-out: +- post_res_handler(frame, this); +- return 0; ++ shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); ++ } ++ /* If the file is sharded, also set the remaining attributes, ++ * except for ia_size and ia_blocks. ++ */ ++ if (size) { ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ (void)shard_inode_ctx_invalidate(inode, this, buf); ++ } ++} ++ ++int shard_delete_shards(void *opaque); ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ ++int shard_start_background_deletion(xlator_t *this) { ++ int ret = 0; ++ gf_boolean_t i_cleanup = _gf_true; ++ shard_priv_t *priv = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ ++ priv = this->private; ++ ++ LOCK(&priv->lock); ++ { ++ switch (priv->bg_del_state) { ++ case SHARD_BG_DELETION_NONE: ++ i_cleanup = _gf_true; ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ break; ++ case SHARD_BG_DELETION_LAUNCHING: ++ i_cleanup = _gf_false; ++ break; ++ case SHARD_BG_DELETION_IN_PROGRESS: ++ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; ++ i_cleanup = _gf_false; ++ break; ++ default: ++ break; ++ } ++ } ++ UNLOCK(&priv->lock); ++ if (!i_cleanup) ++ return 0; ++ ++ cleanup_frame = create_frame(this, this->ctx->pool); ++ if (!cleanup_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "new frame to delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ ++ ret = synctask_new(this->ctx->env, shard_delete_shards, ++ shard_delete_shards_cbk, cleanup_frame, cleanup_frame); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "failed to create task to do background " ++ "cleanup of shards"); ++ STACK_DESTROY(cleanup_frame->root); ++ goto err; ++ } ++ return 0; ++ ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ return ret; + } + +-int +-shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, struct iatt *postparent) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t i_start_cleanup = _gf_false; + +- local = frame->local; ++ priv = this->private; + +- if ((local->fd) && (local->fd->inode)) +- inode = local->fd->inode; +- else if (local->loc.inode) +- inode = local->loc.inode; ++ if (op_ret < 0) ++ goto unwind; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_UPDATE_FILE_SIZE_FAILED, +- "Update to file size" +- " xattr failed on %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (IA_ISDIR(buf->ia_type)) ++ goto unwind; + +- if (shard_modify_size_and_block_count(&local->postbuf, dict)) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +-err: +- local->post_update_size_handler(frame, this); +- return 0; +-} ++ /* Also, if the file is sharded, get the file size and block cnt xattr, ++ * and store them in the stbuf appropriately. ++ */ + +-int +-shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; ++ if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(buf, xdata); + +- if (!size_attr_p) +- goto out; ++ /* If this was a fresh lookup, there are two possibilities: ++ * 1) If the file is sharded (indicated by the presence of block size ++ * xattr), store this block size, along with rdev and mode in its ++ * inode ctx. ++ * 2) If the file is not sharded, store size along with rdev and mode ++ * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is ++ * already initialised to all zeroes, nothing more needs to be done. ++ */ + +- size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); +- if (!size_attr) +- goto out; ++ (void)shard_inode_ctx_update(inode, this, xdata, buf); + +- size_attr[0] = hton64(size); +- /* As sharding evolves, it _may_ be necessary to embed more pieces of +- * information within the same xattr. So allocating slots for them in +- * advance. For now, only bytes 0-63 and 128-191 which would make up the +- * current size and block count respectively of the file are valid. +- */ +- size_attr[2] = hton64(block_count); ++ LOCK(&priv->lock); ++ { ++ if (priv->first_lookup_done == _gf_false) { ++ priv->first_lookup_done = _gf_true; ++ i_start_cleanup = _gf_true; ++ } ++ } ++ UNLOCK(&priv->lock); + +- *size_attr_p = size_attr; ++ if (!i_start_cleanup) ++ goto unwind; + +- ret = 0; +-out: +- return ret; ++ ret = shard_start_background_deletion(this); ++ if (ret < 0) { ++ LOCK(&priv->lock); ++ { priv->first_lookup_done = _gf_false; } ++ UNLOCK(&priv->lock); ++ } ++ ++unwind: ++ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, ++ postparent); ++ return 0; + } + +-int +-shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, +- loc_t *loc, shard_post_update_size_fop_handler_t handler) +-{ +- int ret = -1; +- int64_t *size_attr = NULL; +- int64_t delta_blocks = 0; +- inode_t *inode = NULL; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; ++int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *xattr_req) { ++ int ret = -1; ++ int32_t op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = frame->local; +- local->post_update_size_handler = handler; ++ this->itable = loc->inode->table; ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; +- } ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (fd) +- inode = fd->inode; +- else +- inode = loc->inode; ++ frame->local = local; + +- /* If both size and block count have not changed, then skip the xattrop. +- */ +- delta_blocks = GF_ATOMIC_GET(local->delta_blocks); +- if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { +- goto out; +- } ++ loc_copy(&local->loc, loc); + +- ret = shard_set_size_attrs(local->delta_size + local->hole_size, +- delta_blocks, &size_attr); ++ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, +- "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict" ++ " value: key:%s for path %s", ++ GF_XATTR_SHARD_BLOCK_SIZE, loc->path); ++ goto err; + } ++ } + +- ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict. gfid=%s", +- GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid)); +- GF_FREE(size_attr); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s for path %s.", ++ GF_XATTR_SHARD_FILE_SIZE, loc->path); ++ goto err; + } ++ } + +- if (fd) +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fxattrop, fd, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- else +- STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->xattrop, loc, +- GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); +- +- dict_unref(xattr_req); +- return 0; +- +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; +-} +- +-static inode_t * +-shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode, +- struct iatt *buf, shard_internal_dir_type_t type) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- char *bname = NULL; +- inode_t **priv_inode = NULL; +- inode_t *parent = NULL; +- +- priv = THIS->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- bname = GF_SHARD_DIR; +- priv_inode = &priv->dot_shard_inode; +- parent = inode->table->root; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- bname = GF_SHARD_REMOVE_ME_DIR; +- priv_inode = &priv->dot_shard_rm_inode; +- parent = priv->dot_shard_inode; +- break; +- default: +- break; +- } ++ if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) ++ dict_del(xattr_req, GF_CONTENT_KEY); + +- linked_inode = inode_link(inode, parent, bname, buf); +- inode_lookup(linked_inode); +- *priv_inode = linked_inode; +- return linked_inode; ++ STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, ++int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- shard_local_t *local = NULL; +- inode_t *linked_inode = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; +- +- local = frame->local; +- +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } ++ struct iatt *postparent) { ++ int ret = -1; ++ int32_t mask = SHARD_INODE_WRITE_MASK; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t ctx = { ++ 0, ++ }; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" ++ " failed : %s", ++ loc_gfid_utoa(&(local->loc))); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } ++ ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ ++ if (shard_inode_ctx_get_all(inode, this, &ctx)) ++ mask = SHARD_ALL_MASK; ++ ++ ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, ++ (mask | SHARD_MASK_REFRESH_RESET)); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, ++ "Failed to set inode" ++ " write params into inode ctx for %s", ++ uuid_utoa(buf->ia_gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto unwind; ++ } ++ ++unwind: ++ local->handler(frame, this); ++ return 0; ++} ++ ++int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ shard_post_fop_handler_t handler) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ dict_t *xattr_req = NULL; ++ gf_boolean_t need_refresh = _gf_false; ++ ++ local = frame->local; ++ local->handler = handler; ++ ++ ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, ++ &need_refresh); ++ /* By this time, inode ctx should have been created either in create, ++ * mknod, readdirp or lookup. If not it is a bug! ++ */ ++ if ((ret == 0) && (need_refresh == _gf_false)) { ++ gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" ++ "Serving prebuf off the inode ctx cache", ++ uuid_utoa(loc->gfid)); ++ goto out; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto out; ++ } ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ ++ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ ++ dict_unref(xattr_req); ++ return 0; + +- /* To-Do: Fix refcount increment per call to +- * shard_link_internal_dir_inode(). +- */ +- linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- shard_inode_ctx_mark_dir_refreshed(linked_inode, this); + out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_internal_dir_type_t type) +-{ +- loc_t loc = { +- 0, +- }; +- inode_t *inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; ++int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- break; +- default: +- break; +- } ++ local = frame->local; + +- inode = inode_find(this->itable, gfid); ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +- if (!shard_inode_ctx_needs_lookup(inode, this)) { +- local->op_ret = 0; +- goto out; +- } ++ SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; ++} + +- /* Plain assignment because the ref is already taken above through +- * call to inode_find() +- */ +- loc.inode = inode; +- gf_uuid_copy(loc.gfid, gfid); ++int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, +- NULL); +- loc_wipe(&loc); ++ local = frame->local; + +- return 0; ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, ++ SHARD_LOOKUP_MASK); + +-out: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; ++ SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, ++ &local->prebuf, local->xattr_rsp); ++ return 0; + } + +-int +-shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ dict_t *xdata) { ++ inode_t *inode = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, ++ "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid) ++ : uuid_utoa((local->loc.inode)->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- if (!IA_ISDIR(buf->ia_type)) { +- gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, +- "%s already exists and " +- "is not a directory. Please remove it from all bricks " +- "and try again", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = EIO; +- goto unwind; +- } ++ local->prebuf = *buf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ local->xattr_rsp = dict_ref(xdata); + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; ++ if (local->loc.inode) ++ inode = local->loc.inode; ++ else ++ inode = local->fd->inode; ++ ++ shard_inode_ctx_invalidate(inode, this, &local->prebuf); + + unwind: +- local->post_res_handler(frame, this); +- return 0; ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t post_res_handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- local->post_res_handler = post_res_handler; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- xattr_req = dict_new(); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid of %s into dict", +- shard_internal_dir_string(type)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); ++ return 0; ++ } + +- STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, +- xattr_req); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- dict_unref(xattr_req); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + ++ frame->local = local; ++ ++ local->handler = shard_post_stat_handler; ++ loc_copy(&local->loc, loc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); ++ ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); ++ return 0; + err: +- if (xattr_req) +- dict_unref(xattr_req); +- if (free_gfid) +- GF_FREE(gfid); +- post_res_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); ++ return 0; + } + +-static void +-shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata, +- struct iatt *buf) +-{ +- int ret = 0; +- uint64_t size = 0; +- void *bsize = NULL; +- +- if (shard_inode_ctx_get_block_size(inode, this, &size)) { +- /* Fresh lookup */ +- ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (!ret) +- size = ntoh64(*((uint64_t *)bsize)); +- /* If the file is sharded, set its block size, otherwise just +- * set 0. +- */ ++int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); +- } +- /* If the file is sharded, also set the remaining attributes, +- * except for ia_size and ia_blocks. +- */ +- if (size) { +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- (void)shard_inode_ctx_invalidate(inode, this, buf); +- } +-} ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_delete_shards(void *opaque); ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, xdata); ++ return 0; ++ } + +-int +-shard_start_background_deletion(xlator_t *this) +-{ +- int ret = 0; +- gf_boolean_t i_cleanup = _gf_true; +- shard_priv_t *priv = NULL; +- call_frame_t *cleanup_frame = NULL; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- LOCK(&priv->lock); +- { +- switch (priv->bg_del_state) { +- case SHARD_BG_DELETION_NONE: +- i_cleanup = _gf_true; +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- break; +- case SHARD_BG_DELETION_LAUNCHING: +- i_cleanup = _gf_false; +- break; +- case SHARD_BG_DELETION_IN_PROGRESS: +- priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; +- i_cleanup = _gf_false; +- break; +- default: +- break; +- } +- } +- UNLOCK(&priv->lock); +- if (!i_cleanup) +- return 0; +- +- cleanup_frame = create_frame(this, this->ctx->pool); +- if (!cleanup_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "new frame to delete shards"); +- ret = -ENOMEM; +- goto err; +- } ++ frame->local = local; + +- set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); ++ local->handler = shard_post_fstat_handler; ++ local->fd = fd_ref(fd); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- ret = synctask_new(this->ctx->env, shard_delete_shards, +- shard_delete_shards_cbk, cleanup_frame, cleanup_frame); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_WARNING, errno, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "failed to create task to do background " +- "cleanup of shards"); +- STACK_DESTROY(cleanup_frame->root); +- goto err; +- } +- return 0; ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + ++ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); ++ return 0; + err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- return ret; ++ shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, struct iatt *postparent) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; +- gf_boolean_t i_start_cleanup = _gf_false; ++int shard_post_update_size_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- priv = this->private; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, NULL); ++ return 0; ++} + +- if (IA_ISDIR(buf->ia_type)) +- goto unwind; ++int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *prebuf, ++ struct iatt *postbuf, dict_t *xdata) { ++ inode_t *inode = NULL; ++ int64_t delta_blocks = 0; ++ shard_local_t *local = NULL; + +- /* Also, if the file is sharded, get the file size and block cnt xattr, +- * and store them in the stbuf appropriately. +- */ ++ local = frame->local; + +- if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(buf, xdata); +- +- /* If this was a fresh lookup, there are two possibilities: +- * 1) If the file is sharded (indicated by the presence of block size +- * xattr), store this block size, along with rdev and mode in its +- * inode ctx. +- * 2) If the file is not sharded, store size along with rdev and mode +- * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is +- * already initialised to all zeroes, nothing more needs to be done. +- */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + +- (void)shard_inode_ctx_update(inode, this, xdata, buf); ++ inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, ++ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last" ++ " shard failed : %s", ++ uuid_utoa(inode->gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ local->postbuf.ia_size = local->offset; ++ /* Let the delta be negative. We want xattrop to do subtraction */ ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, ++ postbuf->ia_blocks - prebuf->ia_blocks); ++ GF_ASSERT(delta_blocks <= 0); ++ local->postbuf.ia_blocks += delta_blocks; ++ local->hole_size = 0; ++ ++ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ size_t last_shard_size_after = 0; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ /* A NULL inode could be due to the fact that the last shard which ++ * needs to be truncated does not exist due to it lying in a hole ++ * region. So the only thing left to do in that case would be an ++ * update to file size xattr. ++ */ ++ if (!inode) { ++ gf_msg_debug(this->name, 0, ++ "Last shard to be truncated absent" ++ " in backend: %s. Directly proceeding to update " ++ "file size", ++ uuid_utoa(inode->gfid)); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- LOCK(&priv->lock); +- { +- if (priv->first_lookup_done == _gf_false) { +- priv->first_lookup_done = _gf_true; +- i_start_cleanup = _gf_true; +- } +- } +- UNLOCK(&priv->lock); ++ SHARD_SET_ROOT_FS_ID(frame, local); + +- if (!i_start_cleanup) +- goto unwind; ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, inode->gfid); + +- ret = shard_start_background_deletion(this); +- if (ret < 0) { +- LOCK(&priv->lock); +- { +- priv->first_lookup_done = _gf_false; +- } +- UNLOCK(&priv->lock); +- } ++ last_shard_size_after = (local->offset % local->block_size); + +-unwind: +- SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, +- postparent); +- return 0; ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, ++ NULL); ++ loc_wipe(&loc); ++ return 0; + } + +-int +-shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +-{ +- int ret = -1; +- int32_t op_errno = ENOMEM; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- this->itable = loc->inode->table; +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); +- } ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num); + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uint64_t block_count = 0; ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } ++ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); ++ if (!ret) { ++ GF_ATOMIC_SUB(local->delta_blocks, block_count); ++ } else { ++ /* dict_get failed possibly due to a heterogeneous cluster? */ ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get key %s from dict during truncate of gfid %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ } ++ ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); ++ } ++ return 0; ++} ++ ++int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) { ++ int i = 1; ++ int ret = -1; ++ int call_count = 0; ++ uint32_t cur_block = 0; ++ uint32_t last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xdata_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ cur_block = local->first_block + 1; ++ last_block = local->last_block; ++ ++ /* Determine call count */ ++ for (i = 1; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ call_count++; ++ } ++ ++ if (!call_count) { ++ /* Call count = 0 implies that all of the shards that need to be ++ * unlinked do not exist. So shard xlator would now proceed to ++ * do the final truncate + size updates. ++ */ ++ gf_msg_debug(this->name, 0, "Shards to be unlinked as part of " ++ "truncate absent in backend: %s. Directly " ++ "proceeding to update file size", ++ uuid_utoa(inode->gfid)); ++ local->postbuf.ia_size = local->offset; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->hole_size = 0; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } + +- local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->call_count = call_count; ++ i = 1; ++ xdata_req = dict_new(); ++ if (!xdata_req) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } ++ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set key %s into dict during truncate of %s", ++ GF_GET_FILE_BLOCK_COUNT, ++ uuid_utoa(local->resolver_base_inode->gfid)); ++ dict_unref(xdata_req); ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict" +- " value: key:%s for path %s", +- GF_XATTR_SHARD_BLOCK_SIZE, loc->path); +- goto err; +- } ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ while (cur_block <= last_block) { ++ if (!local->inode_list[i]) { ++ cur_block++; ++ i++; ++ continue; ++ } ++ if (wind_failed) { ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, +- 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s for path %s.", +- GF_XATTR_SHARD_FILE_SIZE, loc->path); +- goto err; +- } ++ shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s. Base file gfid = %s", ++ bname, uuid_utoa(inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[i]); + +- if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) +- dict_del(xattr_req, GF_CONTENT_KEY); ++ STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc, ++ 0, xdata_req); ++ loc_wipe(&loc); ++ next: ++ i++; ++ cur_block++; ++ if (!--call_count) ++ break; ++ } ++ dict_unref(xdata_req); ++ return 0; ++} + +- STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); ++int shard_truncate_do(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->num_blocks == 1) { ++ /* This means that there are no shards to be unlinked. ++ * The fop boils down to truncating the last shard, updating ++ * the size and unwinding. ++ */ ++ shard_truncate_last_shard(frame, this, local->inode_list[0]); + return 0; ++ } else { ++ shard_truncate_htol(frame, this, local->loc.inode); ++ } ++ return 0; + } + +-int +-shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int ret = -1; +- int32_t mask = SHARD_INODE_WRITE_MASK; +- shard_local_t *local = NULL; +- shard_inode_ctx_t ctx = { +- 0, +- }; +- +- local = frame->local; ++int shard_post_lookup_shards_truncate_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_BASE_FILE_LOOKUP_FAILED, +- "Lookup on base file" +- " failed : %s", +- loc_gfid_utoa(&(local->loc))); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ shard_truncate_do(frame, this); ++ return 0; ++} ++ ++void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, ++ struct iatt *buf) { ++ int list_index = 0; ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *linked_inode = NULL; ++ xlator_t *this = NULL; ++ inode_t *fsync_inode = NULL; ++ shard_priv_t *priv = NULL; ++ inode_t *base_inode = NULL; ++ ++ this = THIS; ++ priv = this->private; ++ if (local->loc.inode) { ++ gf_uuid_copy(gfid, local->loc.inode->gfid); ++ base_inode = local->loc.inode; ++ } else if (local->resolver_base_inode) { ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ base_inode = local->resolver_base_inode; ++ } else { ++ gf_uuid_copy(gfid, local->base_gfid); ++ } ++ ++ shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); ++ ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); ++ linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); ++ inode_lookup(linked_inode); ++ list_index = block_num - local->first_block; ++ local->inode_list[list_index] = linked_inode; ++ ++ LOCK(&priv->lock); ++ { ++ fsync_inode = __shard_update_shards_inode_list(linked_inode, this, ++ base_inode, block_num, gfid); ++ } ++ UNLOCK(&priv->lock); ++ if (fsync_inode) ++ shard_initiate_evicted_inode_fsync(this, fsync_inode); ++} ++ ++int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ int call_count = 0; ++ int shard_block_num = (long)cookie; ++ uuid_t gfid = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ if (local->resolver_base_inode) ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ if (op_ret < 0) { ++ /* Ignore absence of shards in the backend in truncate fop. */ ++ switch (local->fop) { ++ case GF_FOP_TRUNCATE: ++ case GF_FOP_FTRUNCATE: ++ case GF_FOP_RENAME: ++ case GF_FOP_UNLINK: ++ if (op_errno == ENOENT) ++ goto done; ++ break; ++ case GF_FOP_WRITE: ++ case GF_FOP_READ: ++ case GF_FOP_ZEROFILL: ++ case GF_FOP_DISCARD: ++ case GF_FOP_FALLOCATE: ++ if ((!local->first_lookup_done) && (op_errno == ENOENT)) { ++ LOCK(&frame->lock); ++ { local->create_count++; } ++ UNLOCK(&frame->lock); ++ goto done; ++ } ++ break; ++ default: ++ break; + } + +- if (shard_inode_ctx_get_all(inode, this, &ctx)) +- mask = SHARD_ALL_MASK; ++ /* else */ ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED, ++ "Lookup on shard %d " ++ "failed. Base file gfid = %s", ++ shard_block_num, uuid_utoa(gfid)); ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +- ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, +- (mask | SHARD_MASK_REFRESH_RESET)); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, +- "Failed to set inode" +- " write params into inode ctx for %s", +- uuid_utoa(buf->ia_gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unwind; +- } ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +-unwind: +- local->handler(frame, this); ++done: ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wake(&local->barrier); + return 0; ++ } else { ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ if (!local->first_lookup_done) ++ local->first_lookup_done = _gf_true; ++ local->pls_fop_handler(frame, this); ++ } ++ } ++ return 0; + } + +-int +-shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, +- shard_post_fop_handler_t handler) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- dict_t *xattr_req = NULL; +- gf_boolean_t need_refresh = _gf_false; ++dict_t *shard_create_gfid_dict(dict_t *dict) { ++ int ret = 0; ++ dict_t *new = NULL; ++ unsigned char *gfid = NULL; + +- local = frame->local; +- local->handler = handler; ++ new = dict_copy_with_ref(dict, NULL); ++ if (!new) ++ return NULL; + +- ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, +- &need_refresh); +- /* By this time, inode ctx should have been created either in create, +- * mknod, readdirp or lookup. If not it is a bug! +- */ +- if ((ret == 0) && (need_refresh == _gf_false)) { +- gf_msg_debug(this->name, 0, +- "Skipping lookup on base file: %s" +- "Serving prebuf off the inode ctx cache", +- uuid_utoa(loc->gfid)); +- goto out; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); ++ if (!gfid) { ++ ret = -1; ++ goto out; ++ } ++ ++ gf_uuid_generate(gfid); ++ ++ ret = dict_set_gfuuid(new, "gfid-req", gfid, false); ++ ++out: ++ if (ret) { ++ dict_unref(new); ++ new = NULL; ++ GF_FREE(gfid); ++ } ++ ++ return new; ++} ++ ++int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, ++ inode_t *inode, ++ shard_post_lookup_shards_fop_handler_t handler) { ++ int i = 0; ++ int ret = 0; ++ int count = 0; ++ int call_count = 0; ++ int32_t shard_idx_iter = 0; ++ int last_block = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ char *bname = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ dict_t *xattr_req = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ count = call_count = local->call_count; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ local->pls_fop_handler = handler; ++ if (local->lookup_shards_barriered) ++ local->barrier.waitfor = local->call_count; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ i++; ++ shard_idx_iter++; ++ continue; ++ } ++ ++ if (wind_failed) { ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); ++ ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; + } + +- xattr_req = dict_new(); ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto out; ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, ++ -1, ENOMEM, NULL, NULL, NULL, NULL); ++ goto next; ++ } ++ ++ STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ next: ++ shard_idx_iter++; ++ i++; ++ ++ if (!--call_count) ++ break; ++ } ++ if (local->lookup_shards_barriered) { ++ syncbarrier_wait(&local->barrier, count); ++ local->pls_fop_handler(frame, this); ++ } ++ return 0; ++} ++ ++int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->op_ret < 0) { ++ if (local->op_errno == ENOENT) { ++ /* If lookup on /.shard fails with ENOENT, it means that ++ * the file was 0-byte in size but truncated sometime in ++ * the past to a higher size which is reflected in the ++ * size xattr, and now being truncated to a lower size. ++ * In this case, the only thing that needs to be done is ++ * to update the size xattr of the file and unwind. ++ */ ++ local->first_block = local->last_block = 0; ++ local->num_blocks = 1; ++ local->call_count = 0; ++ local->op_ret = 0; ++ local->postbuf.ia_size = local->offset; ++ shard_update_file_size(frame, this, local->fd, &local->loc, ++ shard_post_update_size_truncate_handler); ++ return 0; ++ } else { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); ++ if (!local->call_count) ++ shard_truncate_do(frame, this); ++ else ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_truncate_handler); ++ ++ return 0; ++} ++ ++int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ /* First participant block here is the lowest numbered block that would ++ * hold the last byte of the file post successful truncation. ++ * Last participant block is the block that contains the last byte in ++ * the current state of the file. ++ * If (first block == last_block): ++ * then that means that the file only needs truncation of the ++ * first (or last since both are same) block. ++ * Else ++ * if (new_size % block_size == 0) ++ * then that means there is no truncate to be done with ++ * only shards from first_block + 1 through the last ++ * block needing to be unlinked. ++ * else ++ * both truncate of the first block and unlink of the ++ * remaining shards until end of file is required. ++ */ ++ local->first_block = ++ (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1, ++ local->block_size); ++ local->last_block = ++ get_highest_block(0, local->prebuf.ia_size, local->block_size); ++ ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = ++ (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; ++ ++ if ((local->first_block == 0) && (local->num_blocks == 1)) { ++ if (local->fop == GF_FOP_TRUNCATE) ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset, ++ local->xattr_req); ++ else ++ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset, ++ local->xattr_req); ++ return 0; ++ } + +- STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, xattr_req); ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- dict_unref(xattr_req); +- return 0; ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_truncate_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; + +-out: +- if (xattr_req) +- dict_unref(xattr_req); +- handler(frame, this); +- return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ struct iatt tmp_stbuf = { ++ 0, ++ }; + +- local = frame->local; +- +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ local = frame->local; + +- SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ local->postbuf = tmp_stbuf = local->prebuf; ++ ++ if (local->prebuf.ia_size == local->offset) { ++ /* If the file size is same as requested size, unwind the call ++ * immediately. ++ */ ++ if (local->fop == GF_FOP_TRUNCATE) ++ SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf, ++ NULL); ++ else ++ SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, ++ &local->postbuf, NULL); ++ } else if (local->offset > local->prebuf.ia_size) { ++ /* If the truncate is from a lower to a higher size, set the ++ * new size xattr and unwind. ++ */ ++ local->hole_size = local->offset - local->prebuf.ia_size; ++ local->delta_size = 0; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->postbuf.ia_size = local->offset; ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_update_file_size(frame, this, NULL, &local->loc, ++ shard_post_update_size_truncate_handler); ++ } else { ++ /* ... else ++ * i. unlink all shards that need to be unlinked. ++ * ii. truncate the last of the shards. ++ * iii. update the new size using setxattr. ++ * and unwind the fop. ++ */ ++ local->hole_size = 0; ++ local->delta_size = (local->offset - local->prebuf.ia_size); ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ tmp_stbuf.ia_size = local->offset; ++ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, ++ SHARD_INODE_WRITE_MASK); ++ shard_truncate_begin(frame, this); ++ } ++ return 0; + } + +-int +-shard_post_stat_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++/* TO-DO: ++ * Fix updates to size and block count with racing write(s) and truncate(s). ++ */ + +- local = frame->local; ++int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ off_t offset, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, +- SHARD_LOOKUP_MASK); ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, +- &local->prebuf, local->xattr_rsp); ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +-} ++ } + +-int +-shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- shard_local_t *local = NULL; ++ if (!this->itable) ++ this->itable = loc->inode->table; + +- local = frame->local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ loc_copy(&local->loc, loc); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->fop = GF_FOP_TRUNCATE; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->resolver_base_inode = loc->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; + +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, +- "stat failed: %s", +- local->fd ? uuid_utoa(local->fd->inode->gfid) +- : uuid_utoa((local->loc.inode)->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++err: ++ shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->offset = offset; ++ local->block_size = block_size; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FTRUNCATE; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->resolver_base_inode = fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_truncate_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); ++ return 0; ++} + +- local->prebuf = *buf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- local->xattr_rsp = dict_ref(xdata); ++int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- if (local->loc.inode) +- inode = local->loc.inode; +- else +- inode = local->fd->inode; ++ local = frame->local; + +- shard_inode_ctx_invalidate(inode, this, &local->prebuf); ++ if (op_ret == -1) ++ goto unwind; ++ ++ ret = ++ shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); + + unwind: +- local->handler(frame, this); +- return 0; +-} ++ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); + +-int +-shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++ return 0; ++} + +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, ++ dev_t rdev, mode_t umask, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, xdata); +- return 0; +- } ++ frame->local = local; ++ local->block_size = priv->block_size; ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); ++ return 0; ++} + +- frame->local = local; ++int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ shard_local_t *local = NULL; + +- local->handler = shard_post_stat_handler; +- loc_copy(&local->loc, loc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local = frame->local; ++ if (op_ret < 0) ++ goto err; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES); ++ buf->ia_size = local->prebuf.ia_size; ++ buf->ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); +- return 0; ++ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, ++ postparent, xdata); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ local = frame->local; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if (local->op_ret < 0) { ++ SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, xdata); +- return 0; +- } ++ STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, ++ local->xattr_req); ++ return 0; ++} + +- if (!this->itable) +- this->itable = fd->inode->table; ++int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } + +- frame->local = local; ++ if (!block_size) { ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, ++ oldloc, newloc, xdata); ++ return 0; ++ } + +- local->handler = shard_post_fstat_handler; +- local->fd = fd_ref(fd); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ if (!this->itable) ++ this->itable = oldloc->inode->table; + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); +- return 0; ++ frame->local = local; ++ ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_link_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + +- local = frame->local; ++int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, NULL); ++ local = frame->local; ++ ++ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { ++ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, ++ "failed to delete shards of %s", ++ uuid_utoa(local->resolver_base_inode->gfid)); + return 0; +-} ++ } ++ local->op_ret = 0; ++ local->op_errno = 0; + +-int +-shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *prebuf, struct iatt *postbuf, +- dict_t *xdata) +-{ +- inode_t *inode = NULL; +- int64_t delta_blocks = 0; +- shard_local_t *local = NULL; ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ return 0; ++} + +- local = frame->local; ++int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local = frame->local; ++ local->lookup_shards_barriered = _gf_true; + +- inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode +- : local->fd->inode; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, +- "truncate on last" +- " shard failed : %s", +- uuid_utoa(inode->gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } ++ if (!local->call_count) ++ shard_unlink_shards_do(frame, this, local->resolver_base_inode); ++ else ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_unlink_handler); ++ return 0; ++} ++ ++void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) { ++ char block_bname[256] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ inode_t *inode = NULL; ++ inode_t *base_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ int unref_base_inode = 0; ++ int unref_shard_inode = 0; ++ ++ this = THIS; ++ priv = this->private; ++ ++ inode = local->inode_list[shard_block_num - local->first_block]; ++ shard_inode_ctx_get(inode, this, &ctx); ++ base_inode = ctx->base_inode; ++ if (base_inode) ++ gf_uuid_copy(gfid, base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, ctx->base_gfid); ++ shard_make_block_bname(shard_block_num, gfid, block_bname, ++ sizeof(block_bname)); ++ ++ LOCK(&priv->lock); ++ if (base_inode) ++ LOCK(&base_inode->lock); ++ LOCK(&inode->lock); ++ { ++ __shard_inode_ctx_get(inode, this, &ctx); ++ if (!list_empty(&ctx->ilist)) { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; ++ unref_base_inode++; ++ unref_shard_inode++; ++ GF_ASSERT(priv->inode_count >= 0); ++ } ++ if (ctx->fsync_needed) { ++ unref_base_inode++; ++ unref_shard_inode++; ++ list_del_init(&ctx->to_fsync_list); ++ if (base_inode) { ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ base_ictx->fsync_count--; ++ } ++ } ++ } ++ UNLOCK(&inode->lock); ++ if (base_inode) ++ UNLOCK(&base_inode->lock); + +- local->postbuf.ia_size = local->offset; +- /* Let the delta be negative. We want xattrop to do subtraction */ +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, +- postbuf->ia_blocks - prebuf->ia_blocks); +- GF_ASSERT(delta_blocks <= 0); +- local->postbuf.ia_blocks += delta_blocks; +- local->hole_size = 0; ++ inode_unlink(inode, priv->dot_shard_inode, block_bname); ++ inode_ref_reduce_by_n(inode, unref_shard_inode); ++ inode_forget(inode, 0); + +- shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ if (base_inode && unref_base_inode) ++ inode_ref_reduce_by_n(base_inode, unref_base_inode); ++ UNLOCK(&priv->lock); + } + +-int +-shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- size_t last_shard_size_after = 0; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; ++int shard_rename_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- /* A NULL inode could be due to the fact that the last shard which +- * needs to be truncated does not exist due to it lying in a hole +- * region. So the only thing left to do in that case would be an +- * update to file size xattr. +- */ +- if (!inode) { +- gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent" +- " in backend: %s. Directly proceeding to update " +- "file size", +- uuid_utoa(inode->gfid)); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } ++ SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->preoldparent, ++ &local->postoldparent, &local->prenewparent, ++ &local->postnewparent, local->xattr_rsp); ++ return 0; ++} + +- SHARD_SET_ROOT_FS_ID(frame, local); ++int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; + +- loc.inode = inode_ref(inode); +- gf_uuid_copy(loc.gfid, inode->gfid); ++ SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, ++ &local->preoldparent, &local->postoldparent, ++ local->xattr_rsp); ++ return 0; ++} + +- last_shard_size_after = (local->offset % local->block_size); ++int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ shard_local_t *local = NULL; + +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, +- NULL); +- loc_wipe(&loc); +- return 0; +-} ++ local = frame->local; + +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto done; ++ } + +-int +-shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uint64_t block_count = 0; +- shard_local_t *local = NULL; ++ shard_unlink_block_inode(local, shard_block_num); ++done: ++ syncbarrier_wake(&local->barrier); ++ return 0; ++} ++ ++int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, ++ inode_t *inode) { ++ int i = 0; ++ int ret = -1; ++ int count = 0; ++ uint32_t cur_block = 0; ++ uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ ++ char *bname = NULL; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ uuid_t gfid = { ++ 0, ++ }; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t wind_failed = _gf_false; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ if (inode) ++ gf_uuid_copy(gfid, inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (!local->inode_list[i]) ++ continue; ++ count++; ++ } ++ ++ if (!count) { ++ /* callcount = 0 implies that all of the shards that need to be ++ * unlinked are non-existent (in other words the file is full of ++ * holes). ++ */ ++ gf_msg_debug(this->name, 0, "All shards that need to be " ++ "unlinked are non-existent: %s", ++ uuid_utoa(gfid)); ++ return 0; ++ } + +- local = frame->local; ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ local->barrier.waitfor = count; ++ cur_block = cur_block_idx + local->first_block; + +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); +- if (!ret) { +- GF_ATOMIC_SUB(local->delta_blocks, block_count); +- } else { +- /* dict_get failed possibly due to a heterogeneous cluster? */ +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get key %s from dict during truncate of gfid %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- } +- return 0; +-} +- +-int +-shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 1; +- int ret = -1; +- int call_count = 0; +- uint32_t cur_block = 0; +- uint32_t last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xdata_req = NULL; +- +- local = frame->local; +- priv = this->private; +- +- cur_block = local->first_block + 1; +- last_block = local->last_block; +- +- /* Determine call count */ +- for (i = 1; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- call_count++; +- } ++ while (cur_block_idx < local->num_blocks) { ++ if (!local->inode_list[cur_block_idx]) ++ goto next; + +- if (!call_count) { +- /* Call count = 0 implies that all of the shards that need to be +- * unlinked do not exist. So shard xlator would now proceed to +- * do the final truncate + size updates. +- */ +- gf_msg_debug(this->name, 0, +- "Shards to be unlinked as part of " +- "truncate absent in backend: %s. Directly " +- "proceeding to update file size", +- uuid_utoa(inode->gfid)); +- local->postbuf.ia_size = local->offset; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; +- local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->hole_size = 0; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; ++ if (wind_failed) { ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- local->call_count = call_count; +- i = 1; +- xdata_req = dict_new(); +- if (!xdata_req) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } +- ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set key %s into dict during truncate of %s", +- GF_GET_FILE_BLOCK_COUNT, +- uuid_utoa(local->resolver_base_inode->gfid)); +- dict_unref(xdata_req); +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); ++ bname = strrchr(path, '/') + 1; ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on %s, base file gfid = %s", ++ bname, uuid_utoa(gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ loc_wipe(&loc); ++ wind_failed = _gf_true; ++ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; + } + +- SHARD_SET_ROOT_FS_ID(frame, local); +- while (cur_block <= last_block) { +- if (!local->inode_list[i]) { +- cur_block++; +- i++; +- continue; +- } +- if (wind_failed) { +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s. Base file gfid = %s", +- bname, uuid_utoa(inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[i]); +- +- STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); +- loc_wipe(&loc); +- next: +- i++; +- cur_block++; +- if (!--call_count) +- break; +- } +- dict_unref(xdata_req); +- return 0; +-} ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ loc.inode = inode_ref(local->inode_list[cur_block_idx]); + +-int +-shard_truncate_do(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, ++ (void *)(long)cur_block, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, ++ local->xattr_req); ++ loc_wipe(&loc); ++ next: ++ cur_block++; ++ cur_block_idx++; ++ } ++ syncbarrier_wait(&local->barrier, count); ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} ++ ++int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, ++ int now, int first_block, ++ gf_dirent_t *entry) { ++ int i = 0; ++ int ret = 0; ++ shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ ++ local = cleanup_frame->local; ++ ++ local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ return -ENOMEM; ++ ++ local->first_block = first_block; ++ local->last_block = first_block + now - 1; ++ local->num_blocks = now; ++ gf_uuid_parse(entry->d_name, gfid); ++ gf_uuid_copy(local->base_gfid, gfid); ++ local->resolver_base_inode = inode_find(this->itable, gfid); ++ local->call_count = 0; ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) { ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ return -errno; ++ } ++ shard_common_resolve_shards(cleanup_frame, this, ++ shard_post_resolve_unlink_handler); ++ ++ for (i = 0; i < local->num_blocks; i++) { ++ if (local->inode_list[i]) ++ inode_unref(local->inode_list[i]); ++ } ++ GF_FREE(local->inode_list); ++ local->inode_list = NULL; ++ if (local->op_ret) ++ ret = -local->op_errno; ++ syncbarrier_destroy(&local->barrier); ++ inode_unref(local->resolver_base_inode); ++ local->resolver_base_inode = NULL; ++ STACK_RESET(cleanup_frame->root); ++ return ret; ++} ++ ++int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = 0; ++ int shard_count = 0; ++ int first_block = 0; ++ int now = 0; ++ uint64_t size = 0; ++ uint64_t block_size = 0; ++ uint64_t size_array[4] = { ++ 0, ++ }; ++ void *bsize = NULL; ++ void *size_attr = NULL; ++ dict_t *xattr_rsp = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = cleanup_frame->local; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, ++ &xattr_rsp); ++ if (ret) ++ goto err; ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); ++ goto err; ++ } ++ block_size = ntoh64(*((uint64_t *)bsize)); ++ ++ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); ++ goto err; ++ } ++ ++ memcpy(size_array, size_attr, sizeof(size_array)); ++ size = ntoh64(size_array[0]); ++ ++ shard_count = (size / block_size) - 1; ++ if (shard_count < 0) { ++ gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ /* File size < shard-block-size, so nothing to delete */ ++ ret = 0; ++ goto delete_marker; ++ } ++ if ((size % block_size) > 0) ++ shard_count++; ++ ++ if (shard_count == 0) { ++ gf_msg_debug(this->name, 0, "Size of %s is exactly equal to " ++ "its shard-block-size. Nothing to delete. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } ++ gf_msg_debug(this->name, 0, ++ "base file = %s, " ++ "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", " ++ "shard_count=%d", ++ entry->d_name, block_size, size, shard_count); ++ ++ /* Perform a gfid-based lookup to see if gfid corresponding to marker ++ * file's base name exists. ++ */ ++ loc_wipe(&loc); ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ gf_uuid_parse(entry->d_name, loc.gfid); ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (!ret) { ++ gf_msg_debug(this->name, 0, "Base shard corresponding to gfid " ++ "%s is present. Skipping shard deletion. " ++ "Returning", ++ entry->d_name); ++ ret = 0; ++ goto delete_marker; ++ } + +- local = frame->local; ++ first_block = 1; + +- if (local->num_blocks == 1) { +- /* This means that there are no shards to be unlinked. +- * The fop boils down to truncating the last shard, updating +- * the size and unwinding. +- */ +- shard_truncate_last_shard(frame, this, local->inode_list[0]); +- return 0; ++ while (shard_count) { ++ if (shard_count < local->deletion_rate) { ++ now = shard_count; ++ shard_count = 0; + } else { +- shard_truncate_htol(frame, this, local->loc.inode); +- } +- return 0; +-} +- +-int +-shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ now = local->deletion_rate; ++ shard_count -= local->deletion_rate; + } + +- shard_truncate_do(frame, this); +- return 0; +-} ++ gf_msg_debug(this->name, 0, "deleting %d shards starting from " ++ "block %d of gfid %s", ++ now, first_block, entry->d_name); ++ ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block, ++ entry); ++ if (ret) ++ goto err; ++ first_block += now; ++ } + +-void +-shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, +- struct iatt *buf) +-{ +- int list_index = 0; +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *linked_inode = NULL; +- xlator_t *this = NULL; +- inode_t *fsync_inode = NULL; +- shard_priv_t *priv = NULL; +- inode_t *base_inode = NULL; +- +- this = THIS; +- priv = this->private; +- if (local->loc.inode) { +- gf_uuid_copy(gfid, local->loc.inode->gfid); +- base_inode = local->loc.inode; +- } else if (local->resolver_base_inode) { +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- base_inode = local->resolver_base_inode; ++delete_marker: ++ loc_wipe(&loc); ++ loc.inode = inode_ref(inode); ++ loc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); ++ if (ret) ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to delete %s " ++ "from /%s", ++ entry->d_name, GF_SHARD_REMOVE_ME_DIR); ++err: ++ if (xattr_rsp) ++ dict_unref(xattr_rsp); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, ++ gf_dirent_t *entry, inode_t *inode) { ++ int ret = -1; ++ loc_t loc = { ++ 0, ++ }; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ loc.inode = inode_ref(priv->dot_shard_rm_inode); ++ ++ ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); ++ if (ret < 0) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ } ++ goto out; ++ } ++ { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } ++ syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, ++ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); ++out: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, ++ shard_internal_dir_type_t type) { ++ int ret = 0; ++ char *bname = NULL; ++ loc_t *loc = NULL; ++ shard_priv_t *priv = NULL; ++ uuid_t gfid = { ++ 0, ++ }; ++ struct iatt stbuf = { ++ 0, ++ }; ++ ++ priv = this->private; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ loc = &local->dot_shard_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_gfid); ++ bname = GF_SHARD_DIR; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ loc = &local->dot_shard_rm_loc; ++ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); ++ bname = GF_SHARD_REMOVE_ME_DIR; ++ break; ++ default: ++ break; ++ } ++ ++ loc->inode = inode_find(this->itable, gfid); ++ if (!loc->inode) { ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ret = dict_reset(local->xattr_req); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to reset " ++ "dict"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); ++ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req, ++ NULL); ++ if (ret < 0) { ++ if (ret != -ENOENT) ++ gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Lookup on %s failed, exiting", bname); ++ goto err; + } else { +- gf_uuid_copy(gfid, local->base_gfid); ++ shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); + } ++ } ++ ret = 0; ++err: ++ return ret; ++} ++ ++int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, ++ gf_dirent_t *entry) { ++ int ret = 0; ++ loc_t loc = { ++ 0, ++ }; ++ ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ loc.parent = inode_ref(local->fd->inode); ++ ++ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on %s", entry->d_name); ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; ++ ++ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); ++ if (ret < 0) { ++ goto err; ++ } ++ entry->inode = inode_ref(loc.inode); ++ ret = 0; ++err: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_delete_shards(void *opaque) { ++ int ret = 0; ++ off_t offset = 0; ++ loc_t loc = { ++ 0, ++ }; ++ inode_t *link_inode = NULL; ++ xlator_t *this = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ gf_dirent_t entries; ++ gf_dirent_t *entry = NULL; ++ call_frame_t *cleanup_frame = NULL; ++ gf_boolean_t done = _gf_false; ++ ++ this = THIS; ++ priv = this->private; ++ INIT_LIST_HEAD(&entries.list); ++ ++ cleanup_frame = opaque; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create local to " ++ "delete shards"); ++ ret = -ENOMEM; ++ goto err; ++ } ++ cleanup_frame->local = local; ++ local->fop = GF_FOP_UNLINK; ++ ++ local->xattr_req = dict_new(); ++ if (!local->xattr_req) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ local->deletion_rate = priv->deletion_rate; ++ ++ ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".shard absent. Nothing to" ++ " delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } + +- shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); +- +- shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); +- linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); +- inode_lookup(linked_inode); +- list_index = block_num - local->first_block; +- local->inode_list[list_index] = linked_inode; +- ++ ret = shard_resolve_internal_dir(this, local, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ if (ret == -ENOENT) { ++ gf_msg_debug(this->name, 0, ".remove_me absent. " ++ "Nothing to delete. Exiting"); ++ ret = 0; ++ goto err; ++ } else if (ret < 0) { ++ goto err; ++ } ++ ++ local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); ++ if (!local->fd) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ for (;;) { ++ offset = 0; + LOCK(&priv->lock); + { +- fsync_inode = __shard_update_shards_inode_list( +- linked_inode, this, base_inode, block_num, gfid); ++ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { ++ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; ++ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { ++ priv->bg_del_state = SHARD_BG_DELETION_NONE; ++ done = _gf_true; ++ } + } + UNLOCK(&priv->lock); +- if (fsync_inode) +- shard_initiate_evicted_inode_fsync(this, fsync_inode); +-} +- +-int +-shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, int32_t op_errno, +- inode_t *inode, struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- int call_count = 0; +- int shard_block_num = (long)cookie; +- uuid_t gfid = { +- 0, +- }; +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (local->resolver_base_inode) +- gf_uuid_copy(gfid, local->resolver_base_inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- if (op_ret < 0) { +- /* Ignore absence of shards in the backend in truncate fop. */ +- switch (local->fop) { +- case GF_FOP_TRUNCATE: +- case GF_FOP_FTRUNCATE: +- case GF_FOP_RENAME: +- case GF_FOP_UNLINK: +- if (op_errno == ENOENT) +- goto done; +- break; +- case GF_FOP_WRITE: +- case GF_FOP_READ: +- case GF_FOP_ZEROFILL: +- case GF_FOP_DISCARD: +- case GF_FOP_FALLOCATE: +- if ((!local->first_lookup_done) && (op_errno == ENOENT)) { +- LOCK(&frame->lock); +- { +- local->create_count++; +- } +- UNLOCK(&frame->lock); +- goto done; +- } +- break; +- default: +- break; +- } +- +- /* else */ +- gf_msg(this->name, GF_LOG_ERROR, op_errno, +- SHARD_MSG_LOOKUP_SHARD_FAILED, +- "Lookup on shard %d " +- "failed. Base file gfid = %s", +- shard_block_num, uuid_utoa(gfid)); +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_link_block_inode(local, shard_block_num, inode, buf); +- +-done: +- if (local->lookup_shards_barriered) { +- syncbarrier_wake(&local->barrier); +- return 0; +- } else { +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- if (!local->first_lookup_done) +- local->first_lookup_done = _gf_true; +- local->pls_fop_handler(frame, this); +- } +- } +- return 0; +-} +- +-dict_t * +-shard_create_gfid_dict(dict_t *dict) +-{ +- int ret = 0; +- dict_t *new = NULL; +- unsigned char *gfid = NULL; +- +- new = dict_copy_with_ref(dict, NULL); +- if (!new) +- return NULL; +- +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); +- if (!gfid) { +- ret = -1; +- goto out; +- } +- +- gf_uuid_generate(gfid); +- +- ret = dict_set_gfuuid(new, "gfid-req", gfid, false); +- +-out: +- if (ret) { +- dict_unref(new); +- new = NULL; +- GF_FREE(gfid); +- } +- +- return new; +-} ++ if (done) ++ break; ++ while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, ++ &entries, local->xattr_req, NULL))) { ++ if (ret > 0) ++ ret = 0; ++ list_for_each_entry(entry, &entries.list, list) { ++ offset = entry->d_off; + +-int +-shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, +- shard_post_lookup_shards_fop_handler_t handler) +-{ +- int i = 0; +- int ret = 0; +- int count = 0; +- int call_count = 0; +- int32_t shard_idx_iter = 0; +- int last_block = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- char *bname = NULL; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- gf_boolean_t wind_failed = _gf_false; +- dict_t *xattr_req = NULL; +- +- priv = this->private; +- local = frame->local; +- count = call_count = local->call_count; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- local->pls_fop_handler = handler; +- if (local->lookup_shards_barriered) +- local->barrier.waitfor = local->call_count; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); ++ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) ++ continue; + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- i++; +- shard_idx_iter++; ++ if (!entry->inode) { ++ ret = shard_lookup_marker_entry(this, local, entry); ++ if (ret < 0) + continue; + } ++ link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name, ++ &entry->d_stat); + +- if (wind_failed) { +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); +- +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, +- this, -1, ENOMEM, NULL, NULL, NULL, +- NULL); +- goto next; ++ gf_msg_debug(this->name, 0, "Initiating deletion of " ++ "shards of gfid %s", ++ entry->d_name); ++ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, ++ link_inode); ++ inode_unlink(link_inode, local->fd->inode, entry->d_name); ++ inode_unref(link_inode); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, -ret, ++ SHARD_MSG_SHARDS_DELETION_FAILED, ++ "Failed to clean up shards of gfid %s", entry->d_name); ++ continue; + } ++ gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED, ++ "Deleted " ++ "shards of gfid=%s from backend", ++ entry->d_name); ++ } ++ gf_dirent_free(&entries); ++ if (ret) ++ break; ++ } ++ } ++ ret = 0; ++ loc_wipe(&loc); ++ return ret; + +- STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- next: +- shard_idx_iter++; +- i++; +- +- if (!--call_count) +- break; +- } +- if (local->lookup_shards_barriered) { +- syncbarrier_wait(&local->barrier, count); +- local->pls_fop_handler(frame, this); +- } +- return 0; ++err: ++ LOCK(&priv->lock); ++ { priv->bg_del_state = SHARD_BG_DELETION_NONE; } ++ UNLOCK(&priv->lock); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->inodelk_frame; ++ lk_local = lk_frame->local; ++ local->inodelk_frame = NULL; ++ loc = &local->int_inodelk.loc; ++ lock = &lk_local->int_inodelk; ++ lock->flock.l_type = F_UNLCK; ++ ++ STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, ++ &lock->flock, NULL); ++ local->int_inodelk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata); ++int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ loc_t *dst_loc = NULL; ++ loc_t tmp_loc = { ++ 0, ++ }; ++ shard_local_t *local = frame->local; ++ ++ if (local->dst_block_size) { ++ tmp_loc.parent = inode_ref(local->loc2.parent); ++ ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ " on pargfid=%s bname=%s", ++ uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ ++ tmp_loc.name = strrchr(tmp_loc.path, '/'); ++ if (tmp_loc.name) ++ tmp_loc.name++; ++ dst_loc = &tmp_loc; ++ } else { ++ dst_loc = &local->loc2; ++ } ++ ++ /* To-Do: Request open-fd count on dst base file */ ++ STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, ++ local->xattr_req); ++ loc_wipe(&tmp_loc); ++ return 0; ++err: ++ loc_wipe(&tmp_loc); ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this); ++ ++int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Xattrop on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, ++ local->newloc.name); ++ ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; ++} ++ ++int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ dict_t *xdata = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ xdata = dict_new(); ++ if (!xdata) ++ goto err; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->xattrop, &local->newloc, ++ GF_XATTROP_GET_AND_SET, xdata, NULL); ++ dict_unref(xdata); ++ return 0; ++err: ++ if (xdata) ++ dict_unref(xdata); ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- if (local->op_errno == ENOENT) { +- /* If lookup on /.shard fails with ENOENT, it means that +- * the file was 0-byte in size but truncated sometime in +- * the past to a higher size which is reflected in the +- * size xattr, and now being truncated to a lower size. +- * In this case, the only thing that needs to be done is +- * to update the size xattr of the file and unwind. +- */ +- local->first_block = local->last_block = 0; +- local->num_blocks = 1; +- local->call_count = 0; +- local->op_ret = 0; +- local->postbuf.ia_size = local->offset; +- shard_update_file_size(frame, this, local->fd, &local->loc, +- shard_post_update_size_truncate_handler); +- return 0; +- } else { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- } +- +- if (!local->call_count) +- shard_truncate_do(frame, this); +- else +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_truncate_handler); +- +- return 0; ++int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, dict_t *xdata, ++ struct iatt *postparent) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ if (op_ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Lookup on marker file failed " ++ "while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; ++ } ++ ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; ++ shard_set_size_attrs_on_marker_file(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); ++ return 0; + } + +-int +-shard_truncate_begin(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- /* First participant block here is the lowest numbered block that would +- * hold the last byte of the file post successful truncation. +- * Last participant block is the block that contains the last byte in +- * the current state of the file. +- * If (first block == last_block): +- * then that means that the file only needs truncation of the +- * first (or last since both are same) block. +- * Else +- * if (new_size % block_size == 0) +- * then that means there is no truncate to be done with +- * only shards from first_block + 1 through the last +- * block needing to be unlinked. +- * else +- * both truncate of the first block and unlink of the +- * remaining shards until end of file is required. +- */ +- local->first_block = (local->offset == 0) +- ? 0 +- : get_lowest_block(local->offset - 1, +- local->block_size); +- local->last_block = get_highest_block(0, local->prebuf.ia_size, +- local->block_size); +- +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) +- ? local->loc.inode +- : local->fd->inode; +- +- if ((local->first_block == 0) && (local->num_blocks == 1)) { +- if (local->fop == GF_FOP_TRUNCATE) +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, &local->loc, +- local->offset, local->xattr_req); +- else +- STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, local->fd, +- local->offset, local->xattr_req); +- return 0; +- } ++int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) { ++ int op_errno = ENOMEM; ++ dict_t *xattr_req = NULL; ++ shard_local_t *local = NULL; + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ local = frame->local; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, +- shard_post_resolve_truncate_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_truncate_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; + ++ STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- struct iatt tmp_stbuf = { +- 0, +- }; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = tmp_stbuf = local->prebuf; +- +- if (local->prebuf.ia_size == local->offset) { +- /* If the file size is same as requested size, unwind the call +- * immediately. +- */ +- if (local->fop == GF_FOP_TRUNCATE) +- SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- else +- SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, +- &local->postbuf, NULL); +- } else if (local->offset > local->prebuf.ia_size) { +- /* If the truncate is from a lower to a higher size, set the +- * new size xattr and unwind. +- */ +- local->hole_size = local->offset - local->prebuf.ia_size; +- local->delta_size = 0; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- local->postbuf.ia_size = local->offset; +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_update_file_size(frame, this, NULL, &local->loc, +- shard_post_update_size_truncate_handler); ++int shard_create_marker_file_under_remove_me_cbk( ++ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *linked_inode = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (op_ret < 0) { ++ if ((op_errno != EEXIST) && (op_errno != ENODATA)) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Marker file creation " ++ "failed while performing %s; entry gfid=%s", ++ gf_fop_string(local->fop), local->newloc.name); ++ goto err; + } else { +- /* ... else +- * i. unlink all shards that need to be unlinked. +- * ii. truncate the last of the shards. +- * iii. update the new size using setxattr. +- * and unwind the fop. +- */ +- local->hole_size = 0; +- local->delta_size = (local->offset - local->prebuf.ia_size); +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- tmp_stbuf.ia_size = local->offset; +- shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, +- SHARD_INODE_WRITE_MASK); +- shard_truncate_begin(frame, this); +- } +- return 0; +-} +- +-/* TO-DO: +- * Fix updates to size and block count with racing write(s) and truncate(s). +- */ +- +-int +-shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; ++ shard_lookup_marker_file(frame, this); ++ return 0; + } ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = loc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- loc_copy(&local->loc, loc); +- local->offset = offset; +- local->block_size = block_size; +- local->fop = GF_FOP_TRUNCATE; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->resolver_base_inode = loc->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; ++ linked_inode = ++ inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); ++ inode_unref(local->newloc.inode); ++ local->newloc.inode = linked_inode; + ++ if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ else if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = fd->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->offset = offset; +- local->block_size = block_size; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FTRUNCATE; ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++} ++ ++int shard_create_marker_file_under_remove_me(call_frame_t *frame, ++ xlator_t *this, loc_t *loc) { ++ int ret = 0; ++ int op_errno = ENOMEM; ++ uint64_t bs = 0; ++ char g1[64] = { ++ 0, ++ }; ++ char g2[64] = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; ++ ++ priv = this->private; ++ local = frame->local; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) ++ goto err; ++ ++ local->newloc.inode = inode_new(this->itable); ++ local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); ++ ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), ++ (char **)&local->newloc.path); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed on " ++ "pargfid=%s bname=%s", ++ uuid_utoa_r(priv->dot_shard_rm_gfid, g1), ++ uuid_utoa_r(loc->inode->gfid, g2)); ++ goto err; ++ } ++ local->newloc.name = strrchr(local->newloc.path, '/'); ++ if (local->newloc.name) ++ local->newloc.name++; ++ ++ if (local->fop == GF_FOP_UNLINK) ++ bs = local->block_size; ++ else if (local->fop == GF_FOP_RENAME) ++ bs = local->dst_block_size; ++ ++ SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, ++ local->prebuf.ia_size, 0, err); ++ ++ STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc, ++ 0, 0, 0644, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); +- local->resolver_base_inode = fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_truncate_handler); +- return 0; + err: +- shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); +- return 0; ++ if (xattr_req) ++ dict_unref(xattr_req); ++ shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, ++ NULL, NULL, NULL, NULL, NULL); ++ return 0; + } + +-int +-shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, buf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); + +- return 0; +-} ++int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, int32_t op_errno, ++ struct iatt *preparent, struct iatt *postparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +-int +-shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, +- dev_t rdev, mode_t umask, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; ++ local = frame->local; + +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } else { ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ local->preoldparent = *preparent; ++ local->postoldparent = *postparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } + +- frame->local = local; +- local->block_size = priv->block_size; +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; + } ++ } + +- STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); +- return 0; +-} +- +-int32_t +-shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- if (op_ret < 0) +- goto err; +- +- shard_inode_ctx_set(inode, this, buf, 0, +- SHARD_MASK_NLINK | SHARD_MASK_TIMES); +- buf->ia_size = local->prebuf.ia_size; +- buf->ia_blocks = local->prebuf.ia_blocks; +- +- SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, +- postparent, xdata); +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } ++ ++ shard_unlink_cbk(frame, this); ++ return 0; ++} ++ ++int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = frame->local; ++ ++ /* To-Do: Request open-fd count on base file */ ++ STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, ++ local->xattr_req); ++ return 0; ++} ++ ++int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ if (op_ret) ++ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, ++ "Unlock failed. Please check brick logs for " ++ "more details"); ++ SHARD_STACK_DESTROY(frame); ++ return 0; ++} ++ ++int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_entrylk_t *lock = NULL; ++ ++ local = frame->local; ++ lk_frame = local->entrylk_frame; ++ lk_local = lk_frame->local; ++ local->entrylk_frame = NULL; ++ lock = &lk_local->int_entrylk; ++ loc = &lock->loc; ++ ++ STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, loc, ++ lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, ++ NULL); ++ local->int_entrylk.acquired_lock = _gf_false; ++ return 0; ++} ++ ++int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_create_marker_file_under_remove_me(frame, this, ++ &local->int_inodelk.loc); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-entrylk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_entrylk.acquired_lock = _gf_true; ++ shard_post_entrylk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, ++ uuid_t gfid) { ++ char gfid_str[GF_UUID_BUF_SIZE] = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ shard_local_t *entrylk_local = NULL; ++ shard_entrylk_t *int_entrylk = NULL; ++ call_frame_t *entrylk_frame = NULL; ++ ++ local = frame->local; ++ entrylk_frame = create_frame(this, this->ctx->pool); ++ if (!entrylk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock marker file"); ++ goto err; ++ } ++ ++ entrylk_local = mem_get0(this->local_pool); ++ if (!entrylk_local) { ++ STACK_DESTROY(entrylk_frame->root); ++ goto err; ++ } ++ ++ entrylk_frame->local = entrylk_local; ++ entrylk_local->main_frame = frame; ++ int_entrylk = &entrylk_local->int_entrylk; ++ ++ int_entrylk->loc.inode = inode_ref(inode); ++ set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); ++ local->entrylk_frame = entrylk_frame; ++ gf_uuid_unparse(gfid, gfid_str); ++ int_entrylk->basename = gf_strdup(gfid_str); ++ ++ STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, ++ int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, +- NULL, NULL, NULL, NULL); +- return 0; +- } +- +- STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, +- local->xattr_req); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int32_t +-shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (!block_size) { +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, +- oldloc, newloc, xdata); +- return 0; +- } +- +- if (!this->itable) +- this->itable = oldloc->inode->table; +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_link_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); +- +-int +-shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { +- gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, +- "failed to delete shards of %s", +- uuid_utoa(local->resolver_base_inode->gfid)); +- return 0; +- } +- local->op_ret = 0; +- local->op_errno = 0; +- +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- return 0; +-} +- +-int +-shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- local->lookup_shards_barriered = _gf_true; +- +- if (!local->call_count) +- shard_unlink_shards_do(frame, this, local->resolver_base_inode); +- else +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_unlink_handler); +- return 0; +-} +- +-void +-shard_unlink_block_inode(shard_local_t *local, int shard_block_num) +-{ +- char block_bname[256] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- inode_t *inode = NULL; +- inode_t *base_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- int unref_base_inode = 0; +- int unref_shard_inode = 0; +- +- this = THIS; +- priv = this->private; +- +- inode = local->inode_list[shard_block_num - local->first_block]; +- shard_inode_ctx_get(inode, this, &ctx); +- base_inode = ctx->base_inode; +- if (base_inode) +- gf_uuid_copy(gfid, base_inode->gfid); +- else +- gf_uuid_copy(gfid, ctx->base_gfid); +- shard_make_block_bname(shard_block_num, gfid, block_bname, +- sizeof(block_bname)); +- +- LOCK(&priv->lock); +- if (base_inode) +- LOCK(&base_inode->lock); +- LOCK(&inode->lock); +- { +- __shard_inode_ctx_get(inode, this, &ctx); +- if (!list_empty(&ctx->ilist)) { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- unref_base_inode++; +- unref_shard_inode++; +- GF_ASSERT(priv->inode_count >= 0); +- } +- if (ctx->fsync_needed) { +- unref_base_inode++; +- unref_shard_inode++; +- list_del_init(&ctx->to_fsync_list); +- if (base_inode) { +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- base_ictx->fsync_count--; +- } +- } +- } +- UNLOCK(&inode->lock); +- if (base_inode) +- UNLOCK(&base_inode->lock); +- +- inode_unlink(inode, priv->dot_shard_inode, block_bname); +- inode_ref_reduce_by_n(inode, unref_shard_inode); +- inode_forget(inode, 0); +- +- if (base_inode && unref_base_inode) +- inode_ref_reduce_by_n(base_inode, unref_base_inode); +- UNLOCK(&priv->lock); +-} +- +-int +-shard_rename_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->preoldparent, +- &local->postoldparent, &local->prenewparent, +- &local->postnewparent, local->xattr_rsp); +- return 0; +-} +- +-int32_t +-shard_unlink_cbk(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, +- &local->preoldparent, &local->postoldparent, +- local->xattr_rsp); +- return 0; +-} +- +-int +-shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto done; +- } +- +- shard_unlink_block_inode(local, shard_block_num); +-done: +- syncbarrier_wake(&local->barrier); +- return 0; +-} +- +-int +-shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode) +-{ +- int i = 0; +- int ret = -1; +- int count = 0; +- uint32_t cur_block = 0; +- uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ +- char *bname = NULL; +- char path[PATH_MAX] = { +- 0, +- }; +- uuid_t gfid = { +- 0, +- }; +- loc_t loc = { +- 0, +- }; +- gf_boolean_t wind_failed = _gf_false; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (inode) +- gf_uuid_copy(gfid, inode->gfid); +- else +- gf_uuid_copy(gfid, local->base_gfid); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (!local->inode_list[i]) +- continue; +- count++; +- } +- +- if (!count) { +- /* callcount = 0 implies that all of the shards that need to be +- * unlinked are non-existent (in other words the file is full of +- * holes). +- */ +- gf_msg_debug(this->name, 0, +- "All shards that need to be " +- "unlinked are non-existent: %s", +- uuid_utoa(gfid)); +- return 0; +- } +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- local->barrier.waitfor = count; +- cur_block = cur_block_idx + local->first_block; +- +- while (cur_block_idx < local->num_blocks) { +- if (!local->inode_list[cur_block_idx]) +- goto next; +- +- if (wind_failed) { +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); +- bname = strrchr(path, '/') + 1; +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on %s, base file gfid = %s", +- bname, uuid_utoa(gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- loc_wipe(&loc); +- wind_failed = _gf_true; +- shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- loc.inode = inode_ref(local->inode_list[cur_block_idx]); +- +- STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, +- (void *)(long)cur_block, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, +- local->xattr_req); +- loc_wipe(&loc); +- next: +- cur_block++; +- cur_block_idx++; +- } +- syncbarrier_wait(&local->barrier, count); +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- return 0; +-} +- +-int +-shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, +- int now, int first_block, gf_dirent_t *entry) +-{ +- int i = 0; +- int ret = 0; +- shard_local_t *local = NULL; +- uuid_t gfid = { +- 0, +- }; +- +- local = cleanup_frame->local; +- +- local->inode_list = GF_CALLOC(now, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- return -ENOMEM; +- +- local->first_block = first_block; +- local->last_block = first_block + now - 1; +- local->num_blocks = now; +- gf_uuid_parse(entry->d_name, gfid); +- gf_uuid_copy(local->base_gfid, gfid); +- local->resolver_base_inode = inode_find(this->itable, gfid); +- local->call_count = 0; +- ret = syncbarrier_init(&local->barrier); +- if (ret) { +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- return -errno; +- } +- shard_common_resolve_shards(cleanup_frame, this, +- shard_post_resolve_unlink_handler); +- +- for (i = 0; i < local->num_blocks; i++) { +- if (local->inode_list[i]) +- inode_unref(local->inode_list[i]); +- } +- GF_FREE(local->inode_list); +- local->inode_list = NULL; +- if (local->op_ret) +- ret = -local->op_errno; +- syncbarrier_destroy(&local->barrier); +- inode_unref(local->resolver_base_inode); +- local->resolver_base_inode = NULL; +- STACK_RESET(cleanup_frame->root); +- return ret; +-} +- +-int +-__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = 0; +- int shard_count = 0; +- int first_block = 0; +- int now = 0; +- uint64_t size = 0; +- uint64_t block_size = 0; +- uint64_t size_array[4] = { +- 0, +- }; +- void *bsize = NULL; +- void *size_attr = NULL; +- dict_t *xattr_rsp = NULL; +- loc_t loc = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = cleanup_frame->local; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset dict"); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, +- &xattr_rsp); +- if (ret) +- goto err; +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); +- goto err; +- } +- block_size = ntoh64(*((uint64_t *)bsize)); +- +- ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); +- goto err; +- } +- +- memcpy(size_array, size_attr, sizeof(size_array)); +- size = ntoh64(size_array[0]); +- +- shard_count = (size / block_size) - 1; +- if (shard_count < 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s hasn't grown beyond " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- /* File size < shard-block-size, so nothing to delete */ +- ret = 0; +- goto delete_marker; +- } +- if ((size % block_size) > 0) +- shard_count++; +- +- if (shard_count == 0) { +- gf_msg_debug(this->name, 0, +- "Size of %s is exactly equal to " +- "its shard-block-size. Nothing to delete. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- gf_msg_debug(this->name, 0, +- "base file = %s, " +- "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 +- ", " +- "shard_count=%d", +- entry->d_name, block_size, size, shard_count); +- +- /* Perform a gfid-based lookup to see if gfid corresponding to marker +- * file's base name exists. +- */ +- loc_wipe(&loc); +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- gf_uuid_parse(entry->d_name, loc.gfid); +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (!ret) { +- gf_msg_debug(this->name, 0, +- "Base shard corresponding to gfid " +- "%s is present. Skipping shard deletion. " +- "Returning", +- entry->d_name); +- ret = 0; +- goto delete_marker; +- } +- +- first_block = 1; +- +- while (shard_count) { +- if (shard_count < local->deletion_rate) { +- now = shard_count; +- shard_count = 0; +- } else { +- now = local->deletion_rate; +- shard_count -= local->deletion_rate; +- } +- +- gf_msg_debug(this->name, 0, +- "deleting %d shards starting from " +- "block %d of gfid %s", +- now, first_block, entry->d_name); +- ret = shard_regulated_shards_deletion(cleanup_frame, this, now, +- first_block, entry); +- if (ret) +- goto err; +- first_block += now; +- } +- +-delete_marker: +- loc_wipe(&loc); +- loc.inode = inode_ref(inode); +- loc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); +- if (ret) +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to delete %s " +- "from /%s", +- entry->d_name, GF_SHARD_REMOVE_ME_DIR); +-err: +- if (xattr_rsp) +- dict_unref(xattr_rsp); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, +- gf_dirent_t *entry, inode_t *inode) +-{ +- int ret = -1; +- loc_t loc = { +- 0, +- }; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- loc.inode = inode_ref(priv->dot_shard_rm_inode); +- +- ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); +- if (ret < 0) { +- if (ret == -EAGAIN) { +- ret = 0; +- } +- goto out; +- } +- { +- ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); +- } +- syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, +- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); +-out: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) +-{ +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, +- shard_internal_dir_type_t type) +-{ +- int ret = 0; +- char *bname = NULL; +- loc_t *loc = NULL; +- shard_priv_t *priv = NULL; +- uuid_t gfid = { +- 0, +- }; +- struct iatt stbuf = { +- 0, +- }; +- +- priv = this->private; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- loc = &local->dot_shard_loc; +- gf_uuid_copy(gfid, priv->dot_shard_gfid); +- bname = GF_SHARD_DIR; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- loc = &local->dot_shard_rm_loc; +- gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); +- bname = GF_SHARD_REMOVE_ME_DIR; +- break; +- default: +- break; +- } +- +- loc->inode = inode_find(this->itable, gfid); +- if (!loc->inode) { +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; +- ret = dict_reset(local->xattr_req); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to reset " +- "dict"); +- ret = -ENOMEM; +- goto err; +- } +- ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); +- ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, +- local->xattr_req, NULL); +- if (ret < 0) { +- if (ret != -ENOENT) +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Lookup on %s failed, exiting", bname); +- goto err; +- } else { +- shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); +- } +- } +- ret = 0; +-err: +- return ret; +-} +- +-int +-shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, +- gf_dirent_t *entry) +-{ +- int ret = 0; +- loc_t loc = { +- 0, +- }; +- +- loc.inode = inode_new(this->itable); +- if (!loc.inode) { +- ret = -ENOMEM; +- goto err; +- } +- loc.parent = inode_ref(local->fd->inode); +- +- ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on %s", entry->d_name); +- ret = -ENOMEM; +- goto err; +- } +- +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); +- if (ret < 0) { +- goto err; +- } +- entry->inode = inode_ref(loc.inode); +- ret = 0; +-err: +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_delete_shards(void *opaque) +-{ +- int ret = 0; +- off_t offset = 0; +- loc_t loc = { +- 0, +- }; +- inode_t *link_inode = NULL; +- xlator_t *this = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- gf_dirent_t entries; +- gf_dirent_t *entry = NULL; +- call_frame_t *cleanup_frame = NULL; +- gf_boolean_t done = _gf_false; +- +- this = THIS; +- priv = this->private; +- INIT_LIST_HEAD(&entries.list); +- +- cleanup_frame = opaque; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create local to " +- "delete shards"); +- ret = -ENOMEM; +- goto err; +- } +- cleanup_frame->local = local; +- local->fop = GF_FOP_UNLINK; +- +- local->xattr_req = dict_new(); +- if (!local->xattr_req) { +- ret = -ENOMEM; +- goto err; +- } +- local->deletion_rate = priv->deletion_rate; +- +- ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".shard absent. Nothing to" +- " delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- ret = shard_resolve_internal_dir(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- if (ret == -ENOENT) { +- gf_msg_debug(this->name, 0, +- ".remove_me absent. " +- "Nothing to delete. Exiting"); +- ret = 0; +- goto err; +- } else if (ret < 0) { +- goto err; +- } +- +- local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); +- if (!local->fd) { +- ret = -ENOMEM; +- goto err; +- } +- +- for (;;) { +- offset = 0; +- LOCK(&priv->lock); +- { +- if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { +- priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; +- } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- done = _gf_true; +- } +- } +- UNLOCK(&priv->lock); +- if (done) +- break; +- while ( +- (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, +- &entries, local->xattr_req, NULL))) { +- if (ret > 0) +- ret = 0; +- list_for_each_entry(entry, &entries.list, list) +- { +- offset = entry->d_off; +- +- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) +- continue; +- +- if (!entry->inode) { +- ret = shard_lookup_marker_entry(this, local, entry); +- if (ret < 0) +- continue; +- } +- link_inode = inode_link(entry->inode, local->fd->inode, +- entry->d_name, &entry->d_stat); +- +- gf_msg_debug(this->name, 0, +- "Initiating deletion of " +- "shards of gfid %s", +- entry->d_name); +- ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, +- link_inode); +- inode_unlink(link_inode, local->fd->inode, entry->d_name); +- inode_unref(link_inode); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, -ret, +- SHARD_MSG_SHARDS_DELETION_FAILED, +- "Failed to clean up shards of gfid %s", +- entry->d_name); +- continue; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, +- SHARD_MSG_SHARD_DELETION_COMPLETED, +- "Deleted " +- "shards of gfid=%s from backend", +- entry->d_name); +- } +- gf_dirent_free(&entries); +- if (ret) +- break; +- } +- } +- ret = 0; +- loc_wipe(&loc); +- return ret; +- +-err: +- LOCK(&priv->lock); +- { +- priv->bg_del_state = SHARD_BG_DELETION_NONE; +- } +- UNLOCK(&priv->lock); +- loc_wipe(&loc); +- return ret; +-} +- +-int +-shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->inodelk_frame; +- lk_local = lk_frame->local; +- local->inodelk_frame = NULL; +- loc = &local->int_inodelk.loc; +- lock = &lk_local->int_inodelk; +- lock->flock.l_type = F_UNLCK; +- +- STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, +- &lock->flock, NULL); +- local->int_inodelk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata); +-int +-shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- loc_t *dst_loc = NULL; +- loc_t tmp_loc = { +- 0, +- }; +- shard_local_t *local = frame->local; +- +- if (local->dst_block_size) { +- tmp_loc.parent = inode_ref(local->loc2.parent); +- ret = inode_path(tmp_loc.parent, local->loc2.name, +- (char **)&tmp_loc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- " on pargfid=%s bname=%s", +- uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- +- tmp_loc.name = strrchr(tmp_loc.path, '/'); +- if (tmp_loc.name) +- tmp_loc.name++; +- dst_loc = &tmp_loc; +- } else { +- dst_loc = &local->loc2; +- } +- +- /* To-Do: Request open-fd count on dst base file */ +- STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, +- local->xattr_req); +- loc_wipe(&tmp_loc); +- return 0; +-err: +- loc_wipe(&tmp_loc); +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this); +- +-int +-shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Xattrop on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, +- local->newloc.name); +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- dict_t *xdata = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- xdata = dict_new(); +- if (!xdata) +- goto err; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, +- local->prebuf.ia_size, 0, err); +- STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, +- &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); +- dict_unref(xdata); +- return 0; +-err: +- if (xdata) +- dict_unref(xdata); +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, dict_t *xdata, +- struct iatt *postparent) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- if (op_ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Lookup on marker file failed " +- "while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- shard_set_size_attrs_on_marker_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); +- return 0; +-} +- +-int +-shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) +-{ +- int op_errno = ENOMEM; +- dict_t *xattr_req = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); +- dict_unref(xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me_cbk( +- call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, +- int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *linked_inode = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- priv = this->private; +- +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (op_ret < 0) { +- if ((op_errno != EEXIST) && (op_errno != ENODATA)) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Marker file creation " +- "failed while performing %s; entry gfid=%s", +- gf_fop_string(local->fop), local->newloc.name); +- goto err; +- } else { +- shard_lookup_marker_file(frame, this); +- return 0; +- } +- } +- +- linked_inode = inode_link(inode, priv->dot_shard_rm_inode, +- local->newloc.name, buf); +- inode_unref(local->newloc.inode); +- local->newloc.inode = linked_inode; +- +- if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- else if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +-} +- +-int +-shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this, +- loc_t *loc) +-{ +- int ret = 0; +- int op_errno = ENOMEM; +- uint64_t bs = 0; +- char g1[64] = { +- 0, +- }; +- char g2[64] = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) +- goto err; +- +- local->newloc.inode = inode_new(this->itable); +- local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); +- ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), +- (char **)&local->newloc.path); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed on " +- "pargfid=%s bname=%s", +- uuid_utoa_r(priv->dot_shard_rm_gfid, g1), +- uuid_utoa_r(loc->inode->gfid, g2)); +- goto err; +- } +- local->newloc.name = strrchr(local->newloc.path, '/'); +- if (local->newloc.name) +- local->newloc.name++; +- +- if (local->fop == GF_FOP_UNLINK) +- bs = local->block_size; +- else if (local->fop == GF_FOP_RENAME) +- bs = local->dst_block_size; +- +- SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, +- local->prebuf.ia_size, 0, err); +- +- STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, +- &local->newloc, 0, 0, 0644, xattr_req); +- dict_unref(xattr_req); +- return 0; +- +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, +- NULL, NULL, NULL, NULL, NULL); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); +- +-int +-shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, +- struct iatt *preparent, struct iatt *postparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->preoldparent = *preparent; +- local->postoldparent = *postparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- +- shard_unlink_cbk(frame, this); +- return 0; +-} +- +-int +-shard_unlink_base_file(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- +- /* To-Do: Request open-fd count on base file */ +- STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, +- local->xattr_req); +- return 0; +-} +- +-int +-shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- if (op_ret) +- gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, +- "Unlock failed. Please check brick logs for " +- "more details"); +- SHARD_STACK_DESTROY(frame); +- return 0; +-} +- +-int +-shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_entrylk_t *lock = NULL; +- +- local = frame->local; +- lk_frame = local->entrylk_frame; +- lk_local = lk_frame->local; +- local->entrylk_frame = NULL; +- lock = &lk_local->int_entrylk; +- loc = &lock->loc; +- +- STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, loc, +- lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, +- NULL); +- local->int_entrylk.acquired_lock = _gf_false; +- return 0; +-} +- +-int +-shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_create_marker_file_under_remove_me(frame, this, +- &local->int_inodelk.loc); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-entrylk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_entrylk.acquired_lock = _gf_true; +- shard_post_entrylk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, +- uuid_t gfid) +-{ +- char gfid_str[GF_UUID_BUF_SIZE] = { +- 0, +- }; +- shard_local_t *local = NULL; +- shard_local_t *entrylk_local = NULL; +- shard_entrylk_t *int_entrylk = NULL; +- call_frame_t *entrylk_frame = NULL; +- +- local = frame->local; +- entrylk_frame = create_frame(this, this->ctx->pool); +- if (!entrylk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock marker file"); +- goto err; +- } +- +- entrylk_local = mem_get0(this->local_pool); +- if (!entrylk_local) { +- STACK_DESTROY(entrylk_frame->root); +- goto err; +- } +- +- entrylk_frame->local = entrylk_local; +- entrylk_local->main_frame = frame; +- int_entrylk = &entrylk_local->int_entrylk; +- +- int_entrylk->loc.inode = inode_ref(inode); +- set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); +- local->entrylk_frame = entrylk_frame; +- gf_uuid_unparse(gfid, gfid_str); +- int_entrylk->basename = gf_strdup(gfid_str); +- +- STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, +- int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- +- priv = this->private; +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- +- if (local->prebuf.ia_nlink > 1) { +- gf_msg_debug(this->name, 0, +- "link count on %s > 1:%d, " +- "performing rename()/unlink()", +- local->int_inodelk.loc.path, local->prebuf.ia_nlink); +- if (local->fop == GF_FOP_RENAME) +- shard_rename_src_base_file(frame, this); +- else if (local->fop == GF_FOP_UNLINK) +- shard_unlink_base_file(frame, this); +- } else { +- gf_msg_debug(this->name, 0, +- "link count on %s = 1, creating " +- "file under .remove_me", +- local->int_inodelk.loc.path); +- local->cleanup_required = _gf_true; +- shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, +- local->prebuf.ia_gfid); +- } +- return 0; +-} +- +-int +-shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- switch (local->fop) { +- case GF_FOP_UNLINK: +- case GF_FOP_RENAME: +- shard_lookup_base_file(frame, this, &local->int_inodelk.loc, +- shard_post_lookup_base_shard_rm_handler); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "post-inodelk handler not defined. This case should not" +- " be hit"); +- break; +- } +- return 0; +-} +- +-int +-shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- call_frame_t *main_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *main_local = NULL; +- +- local = frame->local; +- main_frame = local->main_frame; +- main_local = main_frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(main_local->fop, main_frame, op_ret, +- op_errno); +- return 0; +- } +- main_local->int_inodelk.acquired_lock = _gf_true; +- shard_post_inodelk_fop_handler(main_frame, this); +- return 0; +-} +- +-int +-shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) +-{ +- call_frame_t *lk_frame = NULL; +- shard_local_t *local = NULL; +- shard_local_t *lk_local = NULL; +- shard_inodelk_t *int_inodelk = NULL; +- +- local = frame->local; +- lk_frame = create_frame(this, this->ctx->pool); +- if (!lk_frame) { +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create new frame " +- "to lock base shard"); +- goto err; +- } +- lk_local = mem_get0(this->local_pool); +- if (!lk_local) { +- STACK_DESTROY(lk_frame->root); +- goto err; +- } +- +- lk_frame->local = lk_local; +- lk_local->main_frame = frame; +- int_inodelk = &lk_local->int_inodelk; +- +- int_inodelk->flock.l_len = 0; +- int_inodelk->flock.l_start = 0; +- int_inodelk->domain = this->name; +- int_inodelk->flock.l_type = F_WRLCK; +- loc_copy(&local->int_inodelk.loc, loc); +- set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); +- local->inodelk_frame = lk_frame; +- +- STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, +- &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- loc_t *loc = NULL; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- if (local->fop == GF_FOP_UNLINK) +- loc = &local->loc; +- else if (local->fop == GF_FOP_RENAME) +- loc = &local->loc2; +- shard_acquire_inodelk(frame, this, loc); +- return 0; +-} +- +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type); +-int +-shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); +- return 0; +- } +- shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- return 0; +-} +- +-void +-shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = frame->local; +- +- local->dot_shard_rm_loc.inode = inode_find(this->itable, +- priv->dot_shard_rm_gfid); +- if (!local->dot_shard_rm_loc.inode) { +- local->dot_shard_loc.inode = inode_find(this->itable, +- priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_pre_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- } else { +- local->post_res_handler = shard_post_mkdir_rm_handler; +- shard_refresh_internal_dir(frame, this, +- SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); +- } +-} +- +-int +-shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- +- loc_copy(&local->loc, loc); +- local->xflag = xflag; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- local->block_size = block_size; +- local->resolver_base_inode = loc->inode; +- local->fop = GF_FOP_UNLINK; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- +- local->resolve_not = _gf_true; +- shard_begin_rm_resolution(frame, this); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_rename_cbk(frame, this); +- return 0; +-} +- +-int +-shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *buf, +- struct iatt *preoldparent, struct iatt *postoldparent, +- struct iatt *prenewparent, struct iatt *postnewparent, +- dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto err; +- } +- /* Set ctx->refresh to TRUE to force a lookup on disk when +- * shard_lookup_base_file() is called next to refresh the hard link +- * count in ctx. Note that this is applicable only to the case where +- * the rename dst is already existent and sharded. +- */ +- if ((local->dst_block_size) && (!local->cleanup_required)) +- shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); +- +- local->prebuf = *buf; +- local->preoldparent = *preoldparent; +- local->postoldparent = *postoldparent; +- local->prenewparent = *prenewparent; +- local->postnewparent = *postnewparent; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- +- if (local->dst_block_size) { +- if (local->entrylk_frame) { +- ret = shard_unlock_entrylk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- } +- } +- +- ret = shard_unlock_inodelk(frame, this); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = -ret; +- goto err; +- } +- if (local->cleanup_required) +- shard_start_background_deletion(this); +- } +- +- /* Now the base file of src, if sharded, is looked up to gather ia_size +- * and ia_blocks.*/ +- if (local->block_size) { +- local->tmp_loc.inode = inode_new(this->itable); +- gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); +- shard_lookup_base_file(frame, this, &local->tmp_loc, +- shard_post_rename_lookup_handler); +- } else { +- shard_rename_cbk(frame, this); +- } +- return 0; +-err: +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +-} +- +-int +-shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- /* Save dst base file attributes into postbuf so the information is not +- * lost when it is overwritten after lookup on base file of src in +- * shard_lookup_base_file_cbk(). +- */ +- local->postbuf = local->prebuf; +- shard_rename_src_base_file(frame, this); +- return 0; +-} +- +-int +-shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, +- dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- uint64_t dst_block_size = 0; +- shard_local_t *local = NULL; +- +- if (IA_ISDIR(oldloc->inode->ia_type)) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); +- if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size from inode ctx of %s", +- uuid_utoa(oldloc->inode->gfid)); +- goto err; +- } +- +- if (newloc->inode) +- ret = shard_inode_ctx_get_block_size(newloc->inode, this, +- &dst_block_size); +- +- /* The following stack_wind covers the case where: +- * a. the src file is not sharded and dst doesn't exist, OR +- * b. the src and dst both exist but are not sharded. +- */ +- if (((!block_size) && (!dst_block_size)) || +- frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- loc_copy(&local->loc, oldloc); +- loc_copy(&local->loc2, newloc); +- local->resolver_base_inode = newloc->inode; +- local->fop = GF_FOP_RENAME; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- +- local->block_size = block_size; +- local->dst_block_size = dst_block_size; +- if (!this->itable) +- this->itable = (local->loc.inode)->table; +- local->resolve_not = _gf_true; +- +- /* The following if-block covers the case where the dst file exists +- * and is sharded. +- */ +- if (local->dst_block_size) { +- shard_begin_rm_resolution(frame, this); +- } else { +- /* The following block covers the case where the dst either doesn't +- * exist or is NOT sharded but the src is sharded. In this case, shard +- * xlator would go ahead and rename src to dst. Once done, it would also +- * lookup the base shard of src to get the ia_size and ia_blocks xattr +- * values. +- */ +- shard_rename_src_base_file(frame, this); +- } +- return 0; +- +-err: +- shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, +- struct iatt *stbuf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret == -1) +- goto unwind; +- +- ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, +- SHARD_ALL_MASK); +- if (ret) +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, +- "Failed to set inode " +- "ctx for %s", +- uuid_utoa(inode->gfid)); +- +-unwind: +- SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, +- preparent, postparent, xdata); +- return 0; +-} +- +-int +-shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +-{ +- shard_priv_t *priv = NULL; +- shard_local_t *local = NULL; +- +- priv = this->private; +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; +- +- frame->local = local; +- local->block_size = priv->block_size; +- +- if (!__is_gsyncd_on_shard_dir(frame, loc)) { +- SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); +- } +- +- STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, +- xdata); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); +- return 0; +-} +- +-int +-shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +-{ +- /* To-Do: Handle open with O_TRUNC under locks */ +- SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); +- return 0; +-} +- +-int +-shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, +- fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- return 0; +-} +- +-int +-shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iovec *vector, +- int32_t count, struct iatt *stbuf, struct iobref *iobref, +- dict_t *xdata) +-{ +- int i = 0; +- int call_count = 0; +- void *address = NULL; +- uint64_t block_num = 0; +- off_t off = 0; +- struct iovec vec = { +- 0, +- }; +- shard_local_t *local = NULL; +- fd_t *anon_fd = cookie; +- shard_inode_ctx_t *ctx = NULL; +- +- local = frame->local; +- +- /* If shard has already seen a failure here before, there is no point +- * in aggregating subsequent reads, so just go to out. +- */ +- if (local->op_ret < 0) +- goto out; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto out; +- } +- +- if (local->op_ret >= 0) +- local->op_ret += op_ret; +- +- shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- block_num = ctx->block_num; +- +- if (block_num == local->first_block) { +- address = local->iobuf->ptr; +- } else { +- /* else +- * address to start writing to = beginning of buffer + +- * number of bytes until end of first block + +- * + block_size times number of blocks +- * between the current block and the first +- */ +- address = (char *)local->iobuf->ptr + +- (local->block_size - (local->offset % local->block_size)) + +- ((block_num - local->first_block - 1) * local->block_size); +- } +- +- for (i = 0; i < count; i++) { +- address = (char *)address + off; +- memcpy(address, vector[i].iov_base, vector[i].iov_len); +- off += vector[i].iov_len; +- } +- +-out: +- if (anon_fd) +- fd_unref(anon_fd); +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- } else { +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, +- &vec, 1, &local->prebuf, local->iobref, +- local->xattr_rsp); +- return 0; +- } +- } +- +- return 0; +-} +- +-int +-shard_readv_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int call_count = 0; +- int last_block = 0; +- int cur_block = 0; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- size_t read_size = 0; +- size_t remaining_size = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- cur_block = local->first_block; +- last_block = local->last_block; +- remaining_size = local->total_size; +- local->call_count = call_count = local->num_blocks; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (fd->flags & O_DIRECT) +- local->flags = O_DIRECT; +- +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, +- 0, NULL, NULL, NULL); +- goto next; +- } +- +- shard_offset = orig_offset % local->block_size; +- read_size = local->block_size - shard_offset; +- if (read_size > remaining_size) +- read_size = remaining_size; +- +- remaining_size -= read_size; +- +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, 0, NULL, NULL, NULL); +- goto next; +- } +- } ++int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; + +- STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, anon_fd, read_size, +- shard_offset, local->flags, local->xattr_req); ++ priv = this->private; ++ local = frame->local; + +- orig_offset += read_size; +- next: +- cur_block++; +- i++; +- call_count--; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ ++ if (local->prebuf.ia_nlink > 1) { ++ gf_msg_debug(this->name, 0, "link count on %s > 1:%d, " ++ "performing rename()/unlink()", ++ local->int_inodelk.loc.path, local->prebuf.ia_nlink); ++ if (local->fop == GF_FOP_RENAME) ++ shard_rename_src_base_file(frame, this); ++ else if (local->fop == GF_FOP_UNLINK) ++ shard_unlink_base_file(frame, this); ++ } else { ++ gf_msg_debug(this->name, 0, "link count on %s = 1, creating " ++ "file under .remove_me", ++ local->int_inodelk.loc.path); ++ local->cleanup_required = _gf_true; ++ shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, ++ local->prebuf.ia_gfid); ++ } ++ return 0; ++} ++ ++int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ switch (local->fop) { ++ case GF_FOP_UNLINK: ++ case GF_FOP_RENAME: ++ shard_lookup_base_file(frame, this, &local->int_inodelk.loc, ++ shard_post_lookup_base_shard_rm_handler); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "post-inodelk handler not defined. This case should not" ++ " be hit"); ++ break; ++ } ++ return 0; ++} ++ ++int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ call_frame_t *main_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *main_local = NULL; ++ ++ local = frame->local; ++ main_frame = local->main_frame; ++ main_local = main_frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); ++ return 0; ++ } ++ main_local->int_inodelk.acquired_lock = _gf_true; ++ shard_post_inodelk_fop_handler(main_frame, this); ++ return 0; ++} ++ ++int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) { ++ call_frame_t *lk_frame = NULL; ++ shard_local_t *local = NULL; ++ shard_local_t *lk_local = NULL; ++ shard_inodelk_t *int_inodelk = NULL; ++ ++ local = frame->local; ++ lk_frame = create_frame(this, this->ctx->pool); ++ if (!lk_frame) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create new frame " ++ "to lock base shard"); ++ goto err; ++ } ++ lk_local = mem_get0(this->local_pool); ++ if (!lk_local) { ++ STACK_DESTROY(lk_frame->root); ++ goto err; ++ } ++ ++ lk_frame->local = lk_local; ++ lk_local->main_frame = frame; ++ int_inodelk = &lk_local->int_inodelk; ++ ++ int_inodelk->flock.l_len = 0; ++ int_inodelk->flock.l_start = 0; ++ int_inodelk->domain = this->name; ++ int_inodelk->flock.l_type = F_WRLCK; ++ loc_copy(&local->int_inodelk.loc, loc); ++ set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); ++ local->inodelk_frame = lk_frame; ++ ++ STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, ++ &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); ++ return 0; ++err: ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- int shard_block_num = (long)cookie; +- int call_count = 0; +- shard_local_t *local = NULL; ++int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ loc_t *loc = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) { +- if (op_errno == EEXIST) { +- LOCK(&frame->lock); +- { +- local->eexist_count++; +- } +- UNLOCK(&frame->lock); +- } else { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } +- gf_msg_debug(this->name, 0, +- "mknod of shard %d " +- "failed: %s", +- shard_block_num, strerror(op_errno)); +- goto done; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); ++ return 0; ++ } ++ if (local->fop == GF_FOP_UNLINK) ++ loc = &local->loc; ++ else if (local->fop == GF_FOP_RENAME) ++ loc = &local->loc2; ++ shard_acquire_inodelk(frame, this, loc); ++ return 0; ++} + +- shard_link_block_inode(local, shard_block_num, inode, buf); ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type); ++int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-done: +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- local->create_count = 0; +- local->post_mknod_handler(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; ++ } ++ shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ return 0; + } + +-int +-shard_common_resume_mknod(call_frame_t *frame, xlator_t *this, +- shard_post_mknod_fop_handler_t post_mknod_handler) +-{ +- int i = 0; +- int shard_idx_iter = 0; +- int last_block = 0; +- int ret = 0; +- int call_count = 0; +- char path[PATH_MAX] = { +- 0, +- }; +- mode_t mode = 0; +- char *bname = NULL; +- shard_priv_t *priv = NULL; +- shard_inode_ctx_t ctx_tmp = { +- 0, +- }; +- shard_local_t *local = NULL; +- gf_boolean_t wind_failed = _gf_false; +- fd_t *fd = NULL; +- loc_t loc = { +- 0, +- }; +- dict_t *xattr_req = NULL; +- +- local = frame->local; +- priv = this->private; +- fd = local->fd; +- shard_idx_iter = local->first_block; +- last_block = local->last_block; +- call_count = local->call_count = local->create_count; +- local->post_mknod_handler = post_mknod_handler; ++void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ priv = this->private; ++ local = frame->local; + +- ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get inode " +- "ctx for %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); ++ local->dot_shard_rm_loc.inode = ++ inode_find(this->itable, priv->dot_shard_rm_gfid); ++ if (!local->dot_shard_rm_loc.inode) { ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_pre_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ } else { ++ local->post_res_handler = shard_post_mkdir_rm_handler; ++ shard_refresh_internal_dir(frame, this, ++ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); ++ } ++} ++ ++int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, ++ dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ loc_copy(&local->loc, loc); ++ local->xflag = xflag; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ local->block_size = block_size; ++ local->resolver_base_inode = loc->inode; ++ local->fop = GF_FOP_UNLINK; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ ++ local->resolve_not = _gf_true; ++ shard_begin_rm_resolution(frame, this); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); ++ return 0; ++} + +- while (shard_idx_iter <= last_block) { +- if (local->inode_list[i]) { +- shard_idx_iter++; +- i++; +- continue; +- } ++int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) { ++ shard_rename_cbk(frame, this); ++ return 0; ++} + +- if (wind_failed) { +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iatt *buf, ++ struct iatt *preoldparent, struct iatt *postoldparent, ++ struct iatt *prenewparent, struct iatt *postnewparent, ++ dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; + +- shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, +- sizeof(path)); +- +- xattr_req = shard_create_gfid_dict(local->xattr_req); +- if (!xattr_req) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ local = frame->local; + +- bname = strrchr(path, '/') + 1; +- loc.inode = inode_new(this->itable); +- loc.parent = inode_ref(priv->dot_shard_inode); +- ret = inode_path(loc.parent, bname, (char **)&(loc.path)); +- if (ret < 0 || !(loc.inode)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, +- "Inode path failed" +- "on %s, base file gfid = %s", +- bname, uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- loc_wipe(&loc); +- dict_unref(xattr_req); +- shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, +- -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); +- goto next; +- } ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ /* Set ctx->refresh to TRUE to force a lookup on disk when ++ * shard_lookup_base_file() is called next to refresh the hard link ++ * count in ctx. Note that this is applicable only to the case where ++ * the rename dst is already existent and sharded. ++ */ ++ if ((local->dst_block_size) && (!local->cleanup_required)) ++ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); ++ ++ local->prebuf = *buf; ++ local->preoldparent = *preoldparent; ++ local->postoldparent = *postoldparent; ++ local->prenewparent = *prenewparent; ++ local->postnewparent = *postnewparent; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); + +- loc.name = strrchr(loc.path, '/'); +- if (loc.name) +- loc.name++; +- +- STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, +- (void *)(long)shard_idx_iter, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->mknod, &loc, mode, +- ctx_tmp.stat.ia_rdev, 0, xattr_req); +- loc_wipe(&loc); +- dict_unref(xattr_req); +- +- next: +- shard_idx_iter++; +- i++; +- if (!--call_count) +- break; ++ if (local->dst_block_size) { ++ if (local->entrylk_frame) { ++ ret = shard_unlock_entrylk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ } + } + +- return 0; ++ ret = shard_unlock_inodelk(frame, this); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = -ret; ++ goto err; ++ } ++ if (local->cleanup_required) ++ shard_start_background_deletion(this); ++ } ++ ++ /* Now the base file of src, if sharded, is looked up to gather ia_size ++ * and ia_blocks.*/ ++ if (local->block_size) { ++ local->tmp_loc.inode = inode_new(this->itable); ++ gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); ++ shard_lookup_base_file(frame, this, &local->tmp_loc, ++ shard_post_rename_lookup_handler); ++ } else { ++ shard_rename_cbk(frame, this); ++ } ++ return 0; + err: +- /* +- * This block is for handling failure in shard_inode_ctx_get_all(). +- * Failures in the while-loop are handled within the loop. +- */ +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- post_mknod_handler(frame, this); +- return 0; ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); +- +-int +-shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++int shard_post_lookup_dst_base_file_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ local = frame->local; + ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ /* Save dst base file attributes into postbuf so the information is not ++ * lost when it is overwritten after lookup on base file of src in ++ * shard_lookup_base_file_cbk(). ++ */ ++ local->postbuf = local->prebuf; ++ shard_rename_src_base_file(frame, this); ++ return 0; ++} ++ ++int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, ++ loc_t *newloc, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ uint64_t dst_block_size = 0; ++ shard_local_t *local = NULL; ++ ++ if (IA_ISDIR(oldloc->inode->ia_type)) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); ++ if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size from inode ctx of %s", ++ uuid_utoa(oldloc->inode->gfid)); ++ goto err; ++ } ++ ++ if (newloc->inode) ++ ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size); ++ ++ /* The following stack_wind covers the case where: ++ * a. the src file is not sharded and dst doesn't exist, OR ++ * b. the src and dst both exist but are not sharded. ++ */ ++ if (((!block_size) && (!dst_block_size)) || ++ frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ loc_copy(&local->loc, oldloc); ++ loc_copy(&local->loc2, newloc); ++ local->resolver_base_inode = newloc->inode; ++ local->fop = GF_FOP_RENAME; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->block_size = block_size; ++ local->dst_block_size = dst_block_size; ++ if (!this->itable) ++ this->itable = (local->loc.inode)->table; ++ local->resolve_not = _gf_true; ++ ++ /* The following if-block covers the case where the dst file exists ++ * and is sharded. ++ */ ++ if (local->dst_block_size) { ++ shard_begin_rm_resolution(frame, this); ++ } else { ++ /* The following block covers the case where the dst either doesn't ++ * exist or is NOT sharded but the src is sharded. In this case, shard ++ * xlator would go ahead and rename src to dst. Once done, it would also ++ * lookup the base shard of src to get the ia_size and ia_blocks xattr ++ * values. ++ */ ++ shard_rename_src_base_file(frame, this); ++ } ++ return 0; ++ ++err: ++ shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, ++ struct iatt *stbuf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int ret = -1; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ if (op_ret == -1) ++ goto unwind; + +- if (!local->eexist_count) { +- shard_readv_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards(frame, this, local->loc.inode, +- shard_post_lookup_shards_readv_handler); +- } +- return 0; ++ ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, ++ SHARD_ALL_MASK); ++ if (ret) ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, ++ "Failed to set inode " ++ "ctx for %s", ++ uuid_utoa(inode->gfid)); ++ ++unwind: ++ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, ++ preparent, postparent, xdata); ++ return 0; + } + +-int +-shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ++ shard_priv_t *priv = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ priv = this->private; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- if (local->op_ret < 0) { +- if (local->op_errno != ENOENT) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; +- } else { +- struct iovec vec = { +- 0, +- }; +- +- vec.iov_base = local->iobuf->ptr; +- vec.iov_len = local->total_size; +- local->op_ret = local->total_size; +- SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, +- &local->prebuf, local->iobref, NULL); +- return 0; +- } +- } ++ frame->local = local; ++ local->block_size = priv->block_size; + +- if (local->call_count) { +- shard_common_lookup_shards(frame, this, local->resolver_base_inode, +- shard_post_lookup_shards_readv_handler); +- } else { +- shard_readv_do(frame, this); +- } ++ if (!__is_gsyncd_on_shard_dir(frame, loc)) { ++ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); ++ } + +- return 0; +-} ++ STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, ++ xdata); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { ++ /* To-Do: Handle open with O_TRUNC under locks */ ++ SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); ++ return 0; ++} ++ ++int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, ++ fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); ++ return 0; ++} ++ ++int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, struct iovec *vector, ++ int32_t count, struct iatt *stbuf, struct iobref *iobref, ++ dict_t *xdata) { ++ int i = 0; ++ int call_count = 0; ++ void *address = NULL; ++ uint64_t block_num = 0; ++ off_t off = 0; ++ struct iovec vec = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ fd_t *anon_fd = cookie; ++ shard_inode_ctx_t *ctx = NULL; ++ ++ local = frame->local; ++ ++ /* If shard has already seen a failure here before, there is no point ++ * in aggregating subsequent reads, so just go to out. ++ */ ++ if (local->op_ret < 0) ++ goto out; ++ ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto out; ++ } ++ ++ if (local->op_ret >= 0) ++ local->op_ret += op_ret; + +-int +-shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- struct iobuf *iobuf = NULL; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; ++ shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ block_num = ctx->block_num; ++ ++ if (block_num == local->first_block) { ++ address = local->iobuf->ptr; ++ } else { ++ /* else ++ * address to start writing to = beginning of buffer + ++ * number of bytes until end of first block + ++ * + block_size times number of blocks ++ * between the current block and the first ++ */ ++ address = (char *)local->iobuf->ptr + ++ (local->block_size - (local->offset % local->block_size)) + ++ ((block_num - local->first_block - 1) * local->block_size); ++ } + +- priv = this->private; +- local = frame->local; ++ for (i = 0; i < count; i++) { ++ address = (char *)address + off; ++ memcpy(address, vector[i].iov_base, vector[i].iov_len); ++ off += vector[i].iov_len; ++ } + ++out: ++ if (anon_fd) ++ fd_unref(anon_fd); ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, +- local->op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, ++ &local->prebuf, local->iobref, local->xattr_rsp); ++ return 0; ++ } ++ } ++ ++ return 0; ++} ++ ++int shard_readv_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int call_count = 0; ++ int last_block = 0; ++ int cur_block = 0; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ size_t read_size = 0; ++ size_t remaining_size = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ cur_block = local->first_block; ++ last_block = local->last_block; ++ remaining_size = local->total_size; ++ local->call_count = call_count = local->num_blocks; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (fd->flags & O_DIRECT) ++ local->flags = O_DIRECT; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0, ++ NULL, NULL, NULL); ++ goto next; ++ } ++ ++ shard_offset = orig_offset % local->block_size; ++ read_size = local->block_size - shard_offset; ++ if (read_size > remaining_size) ++ read_size = remaining_size; ++ ++ remaining_size -= read_size; ++ ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); ++ } else { ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL, ++ 0, NULL, NULL, NULL); ++ goto next; ++ } + } + +- if (local->offset >= local->prebuf.ia_size) { +- /* If the read is being performed past the end of the file, +- * unwind the FOP with 0 bytes read as status. +- */ +- struct iovec vec = { +- 0, +- }; +- +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); +- if (!iobuf) +- goto err; +- +- vec.iov_base = iobuf->ptr; +- vec.iov_len = 0; +- local->iobref = iobref_new(); +- iobref_add(local->iobref, iobuf); +- iobuf_unref(iobuf); +- +- SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, +- local->iobref, NULL); +- return 0; +- } ++ STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, anon_fd, read_size, ++ shard_offset, local->flags, local->xattr_req); ++ ++ orig_offset += read_size; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; ++} + +- local->first_block = get_lowest_block(local->offset, local->block_size); ++int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ int shard_block_num = (long)cookie; ++ int call_count = 0; ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (op_ret < 0) { ++ if (op_errno == EEXIST) { ++ LOCK(&frame->lock); ++ { local->eexist_count++; } ++ UNLOCK(&frame->lock); ++ } else { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ } ++ gf_msg_debug(this->name, 0, "mknod of shard %d " ++ "failed: %s", ++ shard_block_num, strerror(op_errno)); ++ goto done; ++ } + +- local->total_size = local->req_size; ++ shard_link_block_inode(local, shard_block_num, inode, buf); + +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); ++done: ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ local->create_count = 0; ++ local->post_mknod_handler(frame, this); ++ } ++ ++ return 0; ++} ++ ++int shard_common_resume_mknod( ++ call_frame_t *frame, xlator_t *this, ++ shard_post_mknod_fop_handler_t post_mknod_handler) { ++ int i = 0; ++ int shard_idx_iter = 0; ++ int last_block = 0; ++ int ret = 0; ++ int call_count = 0; ++ char path[PATH_MAX] = { ++ 0, ++ }; ++ mode_t mode = 0; ++ char *bname = NULL; ++ shard_priv_t *priv = NULL; ++ shard_inode_ctx_t ctx_tmp = { ++ 0, ++ }; ++ shard_local_t *local = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ fd_t *fd = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ dict_t *xattr_req = NULL; ++ ++ local = frame->local; ++ priv = this->private; ++ fd = local->fd; ++ shard_idx_iter = local->first_block; ++ last_block = local->last_block; ++ call_count = local->call_count = local->create_count; ++ local->post_mknod_handler = post_mknod_handler; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get inode " ++ "ctx for %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); + +- local->num_blocks = local->last_block - local->first_block + 1; +- local->resolver_base_inode = local->loc.inode; ++ while (shard_idx_iter <= last_block) { ++ if (local->inode_list[i]) { ++ shard_idx_iter++; ++ i++; ++ continue; ++ } + +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) +- goto err; ++ if (wind_failed) { ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; ++ } + +- iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); +- if (!iobuf) +- goto err; ++ shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, ++ sizeof(path)); + +- local->iobref = iobref_new(); +- if (!local->iobref) { +- iobuf_unref(iobuf); +- goto err; ++ xattr_req = shard_create_gfid_dict(local->xattr_req); ++ if (!xattr_req) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- if (iobref_add(local->iobref, iobuf) != 0) { +- iobuf_unref(iobuf); +- goto err; ++ bname = strrchr(path, '/') + 1; ++ loc.inode = inode_new(this->itable); ++ loc.parent = inode_ref(priv->dot_shard_inode); ++ ret = inode_path(loc.parent, bname, (char **)&(loc.path)); ++ if (ret < 0 || !(loc.inode)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, ++ "Inode path failed" ++ "on %s, base file gfid = %s", ++ bname, uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, ++ ENOMEM, NULL, NULL, NULL, NULL, NULL); ++ goto next; + } + +- memset(iobuf->ptr, 0, local->total_size); +- iobuf_unref(iobuf); +- local->iobuf = iobuf; ++ loc.name = strrchr(loc.path, '/'); ++ if (loc.name) ++ loc.name++; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); +- if (!local->dot_shard_loc.inode) { +- ret = shard_init_internal_dir_loc(this, local, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- if (ret) +- goto err; +- shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- local->post_res_handler = shard_post_resolve_readv_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } +- return 0; ++ STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, ++ (void *)(long)shard_idx_iter, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->mknod, &loc, mode, ++ ctx_tmp.stat.ia_rdev, 0, xattr_req); ++ loc_wipe(&loc); ++ dict_unref(xattr_req); ++ ++ next: ++ shard_idx_iter++; ++ i++; ++ if (!--call_count) ++ break; ++ } ++ ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); +- return 0; ++ /* ++ * This block is for handling failure in shard_inode_ctx_get_all(). ++ * Failures in the while-loop are handled within the loop. ++ */ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ post_mknod_handler(frame, this); ++ return 0; + } + +-int +-shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, uint32_t flags, dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++int shard_post_lookup_shards_readv_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, +- xdata); +- return 0; +- } ++ local = frame->local; + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } + +- frame->local = local; ++ return 0; ++} + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto err; +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->offset = offset; +- local->req_size = size; +- local->flags = flags; +- local->fop = GF_FOP_READ; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local = frame->local; + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_readv_handler); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } ++ ++ if (!local->eexist_count) { ++ shard_readv_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards(frame, this, local->loc.inode, ++ shard_post_lookup_shards_readv_handler); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_update_size_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); ++ if (local->op_ret < 0) { ++ if (local->op_errno != ENOENT) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } else { +- shard_common_inode_write_success_unwind(local->fop, frame, +- local->written_size); ++ struct iovec vec = { ++ 0, ++ }; ++ ++ vec.iov_base = local->iobuf->ptr; ++ vec.iov_len = local->total_size; ++ local->op_ret = local->total_size; ++ SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, ++ &local->prebuf, local->iobref, NULL); ++ return 0; + } +- return 0; +-} ++ } + +-static gf_boolean_t +-shard_is_appending_write(shard_local_t *local) +-{ +- if (local->fop != GF_FOP_WRITE) +- return _gf_false; +- if (local->flags & O_APPEND) +- return _gf_true; +- if (local->fd->flags & O_APPEND) +- return _gf_true; +- return _gf_false; ++ if (local->call_count) { ++ shard_common_lookup_shards(frame, this, local->resolver_base_inode, ++ shard_post_lookup_shards_readv_handler); ++ } else { ++ shard_readv_do(frame, this); ++ } ++ ++ return 0; + } + +-int +-__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ struct iobuf *iobuf = NULL; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ ++ priv = this->private; ++ local = frame->local; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ if (local->offset >= local->prebuf.ia_size) { ++ /* If the read is being performed past the end of the file, ++ * unwind the FOP with 0 bytes read as status. ++ */ ++ struct iovec vec = { ++ 0, ++ }; + +- if (shard_is_appending_write(local)) { +- local->delta_size = local->total_size; +- } else if (local->offset + local->total_size > ctx->stat.ia_size) { +- local->delta_size = (local->offset + local->total_size) - +- ctx->stat.ia_size; +- } else { +- local->delta_size = 0; +- } +- ctx->stat.ia_size += (local->delta_size); +- local->postbuf = ctx->stat; ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); ++ if (!iobuf) ++ goto err; ++ ++ vec.iov_base = iobuf->ptr; ++ vec.iov_len = 0; ++ local->iobref = iobref_new(); ++ iobref_add(local->iobref, iobuf); ++ iobuf_unref(iobuf); + ++ SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, ++ local->iobref, NULL); + return 0; +-} ++ } + +-int +-shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; ++ local->first_block = get_lowest_block(local->offset, local->block_size); + +- LOCK(&inode->lock); +- { +- ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++ local->total_size = local->req_size; + +- return ret; +-} ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); + +-int +-shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, struct iatt *pre, +- struct iatt *post, dict_t *xdata) +-{ +- int call_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- glusterfs_fop_t fop = 0; ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->resolver_base_inode = local->loc.inode; + +- local = frame->local; +- fop = local->fop; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) ++ goto err; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- } else { +- local->written_size += op_ret; +- GF_ATOMIC_ADD(local->delta_blocks, +- post->ia_blocks - pre->ia_blocks); +- local->delta_size += (post->ia_size - pre->ia_size); +- shard_inode_ctx_set(local->fd->inode, this, post, 0, +- SHARD_MASK_TIMES); +- if (local->fd->inode != anon_fd->inode) +- shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, +- anon_fd->inode); +- } +- } +- UNLOCK(&frame->lock); ++ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); ++ if (!iobuf) ++ goto err; + +- if (anon_fd) +- fd_unref(anon_fd); ++ local->iobref = iobref_new(); ++ if (!local->iobref) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- call_count = shard_call_count_return(frame); +- if (call_count == 0) { +- SHARD_UNSET_ROOT_FS_ID(frame, local); +- if (local->op_ret < 0) { +- shard_common_failure_unwind(fop, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); +- local->hole_size = 0; +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- shard_update_file_size( +- frame, this, local->fd, NULL, +- shard_common_inode_write_post_update_size_handler); +- } +- } ++ if (iobref_add(local->iobref, iobuf) != 0) { ++ iobuf_unref(iobuf); ++ goto err; ++ } + +- return 0; ++ memset(iobuf->ptr, 0, local->total_size); ++ iobuf_unref(iobuf); ++ local->iobuf = iobuf; ++ ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ if (!local->dot_shard_loc.inode) { ++ ret = ++ shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); ++ if (ret) ++ goto err; ++ shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ local->post_res_handler = shard_post_resolve_readv_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, uint32_t flags, dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto err; ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->offset = offset; ++ local->req_size = size; ++ local->flags = flags; ++ local->fop = GF_FOP_READ; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_readv_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vec, int count, off_t shard_offset, +- size_t size) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_post_update_size_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- switch (local->fop) { +- case GF_FOP_WRITE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset, +- local->flags, local->iobref, local->xattr_req); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_COOKIE( +- frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, local->flags, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, +- shard_offset, size, local->xattr_req); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, +- FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, +- shard_offset, size, local->xattr_req); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", local->fop); +- break; +- } +- return 0; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_common_inode_write_success_unwind(local->fop, frame, ++ local->written_size); ++ } ++ return 0; + } + +-int +-shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) +-{ +- int i = 0; +- int count = 0; +- int call_count = 0; +- int last_block = 0; +- uint32_t cur_block = 0; +- fd_t *fd = NULL; +- fd_t *anon_fd = NULL; +- shard_local_t *local = NULL; +- struct iovec *vec = NULL; +- gf_boolean_t wind_failed = _gf_false; +- gf_boolean_t odirect = _gf_false; +- off_t orig_offset = 0; +- off_t shard_offset = 0; +- off_t vec_offset = 0; +- size_t remaining_size = 0; +- size_t shard_write_size = 0; +- +- local = frame->local; +- fd = local->fd; +- +- orig_offset = local->offset; +- remaining_size = local->total_size; +- cur_block = local->first_block; +- local->call_count = call_count = local->num_blocks; +- last_block = local->last_block; +- +- SHARD_SET_ROOT_FS_ID(frame, local); +- +- if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC +- " into " +- "dict: %s", +- uuid_utoa(fd->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- local->call_count = 1; +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- return 0; +- } ++static gf_boolean_t shard_is_appending_write(shard_local_t *local) { ++ if (local->fop != GF_FOP_WRITE) ++ return _gf_false; ++ if (local->flags & O_APPEND) ++ return _gf_true; ++ if (local->fd->flags & O_APPEND) ++ return _gf_true; ++ return _gf_false; ++} + +- if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) +- odirect = _gf_true; ++int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- while (cur_block <= last_block) { +- if (wind_failed) { +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, +- ENOMEM, NULL, NULL, NULL); +- goto next; +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- shard_offset = orig_offset % local->block_size; +- shard_write_size = local->block_size - shard_offset; +- if (shard_write_size > remaining_size) +- shard_write_size = remaining_size; +- +- remaining_size -= shard_write_size; +- +- if (local->fop == GF_FOP_WRITE) { +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, NULL); +- +- vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); +- if (!vec) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, +- -1, ENOMEM, NULL, NULL, NULL); +- goto next; +- } +- count = iov_subset(local->vector, local->count, vec_offset, +- vec_offset + shard_write_size, vec); +- } ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- if (cur_block == 0) { +- anon_fd = fd_ref(fd); +- } else { +- anon_fd = fd_anonymous(local->inode_list[i]); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- wind_failed = _gf_true; +- GF_FREE(vec); +- shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, +- this, -1, ENOMEM, NULL, NULL, +- NULL); +- goto next; +- } +- +- if (local->fop == GF_FOP_WRITE) { +- if (odirect) +- local->flags = O_DIRECT; +- else +- local->flags = GF_ANON_FD_FLAGS; +- } +- } ++ if (shard_is_appending_write(local)) { ++ local->delta_size = local->total_size; ++ } else if (local->offset + local->total_size > ctx->stat.ia_size) { ++ local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; ++ } else { ++ local->delta_size = 0; ++ } ++ ctx->stat.ia_size += (local->delta_size); ++ local->postbuf = ctx->stat; + +- shard_common_inode_write_wind(frame, this, anon_fd, vec, count, +- shard_offset, shard_write_size); +- if (vec) +- vec_offset += shard_write_size; +- orig_offset += shard_write_size; +- GF_FREE(vec); +- vec = NULL; +- next: +- cur_block++; +- i++; +- call_count--; +- } +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, +- xlator_t *this); ++int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ ++ LOCK(&inode->lock); ++ { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +-int +-shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ return ret; ++} + +- local = frame->local; ++int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, struct iatt *pre, ++ struct iatt *post, dict_t *xdata) { ++ int call_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ glusterfs_fop_t fop = 0; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ local = frame->local; ++ fop = local->fop; + +- if (local->create_count) { +- shard_common_resume_mknod(frame, this, +- shard_common_inode_write_post_mknod_handler); ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; + } else { +- shard_common_inode_write_do(frame, this); ++ local->written_size += op_ret; ++ GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks); ++ local->delta_size += (post->ia_size - pre->ia_size); ++ shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); ++ if (local->fd->inode != anon_fd->inode) ++ shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, ++ anon_fd->inode); ++ } ++ } ++ UNLOCK(&frame->lock); ++ ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count == 0) { ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno); ++ } else { ++ shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); ++ local->hole_size = 0; ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ shard_update_file_size(frame, this, local->fd, NULL, ++ shard_common_inode_write_post_update_size_handler); + } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; ++int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vec, int count, ++ off_t shard_offset, size_t size) { ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ switch (local->fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, ++ vec, count, shard_offset, local->flags, local->iobref, ++ local->xattr_req); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, ++ local->flags, shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, ++ shard_offset, size, local->xattr_req); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", local->fop); ++ break; ++ } ++ return 0; ++} ++ ++int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) { ++ int i = 0; ++ int count = 0; ++ int call_count = 0; ++ int last_block = 0; ++ uint32_t cur_block = 0; ++ fd_t *fd = NULL; ++ fd_t *anon_fd = NULL; ++ shard_local_t *local = NULL; ++ struct iovec *vec = NULL; ++ gf_boolean_t wind_failed = _gf_false; ++ gf_boolean_t odirect = _gf_false; ++ off_t orig_offset = 0; ++ off_t shard_offset = 0; ++ off_t vec_offset = 0; ++ size_t remaining_size = 0; ++ size_t shard_write_size = 0; ++ ++ local = frame->local; ++ fd = local->fd; ++ ++ orig_offset = local->offset; ++ remaining_size = local->total_size; ++ cur_block = local->first_block; ++ local->call_count = call_count = local->num_blocks; ++ last_block = local->last_block; ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into " ++ "dict: %s", ++ uuid_utoa(fd->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ local->call_count = 1; ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ return 0; ++ } + +- if (!local->eexist_count) { +- shard_common_inode_write_do(frame, this); +- } else { +- local->call_count = local->eexist_count; +- shard_common_lookup_shards( +- frame, this, local->loc.inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) ++ odirect = _gf_true; ++ ++ while (cur_block <= last_block) { ++ if (wind_failed) { ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ goto next; + } + +- return 0; +-} ++ shard_offset = orig_offset % local->block_size; ++ shard_write_size = local->block_size - shard_offset; ++ if (shard_write_size > remaining_size) ++ shard_write_size = remaining_size; + +-int +-shard_common_inode_write_post_resolve_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = NULL; ++ remaining_size -= shard_write_size; + +- local = frame->local; ++ if (local->fop == GF_FOP_WRITE) { ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, NULL); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; ++ vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); ++ if (!vec) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } ++ count = iov_subset(local->vector, local->count, vec_offset, ++ vec_offset + shard_write_size, vec); + } + +- if (local->call_count) { +- shard_common_lookup_shards( +- frame, this, local->resolver_base_inode, +- shard_common_inode_write_post_lookup_shards_handler); ++ if (cur_block == 0) { ++ anon_fd = fd_ref(fd); + } else { +- shard_common_inode_write_do(frame, this); +- } ++ anon_fd = fd_anonymous(local->inode_list[i]); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ wind_failed = _gf_true; ++ GF_FREE(vec); ++ shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1, ++ ENOMEM, NULL, NULL, NULL); ++ goto next; ++ } + +- return 0; ++ if (local->fop == GF_FOP_WRITE) { ++ if (odirect) ++ local->flags = O_DIRECT; ++ else ++ local->flags = GF_ANON_FD_FLAGS; ++ } ++ } ++ ++ shard_common_inode_write_wind(frame, this, anon_fd, vec, count, ++ shard_offset, shard_write_size); ++ if (vec) ++ vec_offset += shard_write_size; ++ orig_offset += shard_write_size; ++ GF_FREE(vec); ++ vec = NULL; ++ next: ++ cur_block++; ++ i++; ++ call_count--; ++ } ++ return 0; + } + +-int +-shard_common_inode_write_post_lookup_handler(call_frame_t *frame, +- xlator_t *this) +-{ +- shard_local_t *local = frame->local; +- shard_priv_t *priv = this->private; +- +- if (local->op_ret < 0) { +- shard_common_failure_unwind(local->fop, frame, local->op_ret, +- local->op_errno); +- return 0; +- } +- +- local->postbuf = local->prebuf; +- +- /*Adjust offset to EOF so that correct shard is chosen for append*/ +- if (shard_is_appending_write(local)) +- local->offset = local->prebuf.ia_size; +- +- local->first_block = get_lowest_block(local->offset, local->block_size); +- local->last_block = get_highest_block(local->offset, local->total_size, +- local->block_size); +- local->num_blocks = local->last_block - local->first_block + 1; +- local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), +- gf_shard_mt_inode_list); +- if (!local->inode_list) { +- shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); +- return 0; +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this); + +- gf_msg_trace(this->name, 0, +- "%s: gfid=%s first_block=%" PRIu32 +- " " +- "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 +- " total_size=%zu flags=%" PRId32 "", +- gf_fop_list[local->fop], +- uuid_utoa(local->resolver_base_inode->gfid), +- local->first_block, local->last_block, local->num_blocks, +- local->offset, local->total_size, local->flags); ++int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); ++ local = frame->local; + +- if (!local->dot_shard_loc.inode) { +- /*change handler*/ +- shard_mkdir_internal_dir(frame, this, +- shard_common_inode_write_post_resolve_handler, +- SHARD_INTERNAL_DIR_DOT_SHARD); +- } else { +- /*change handler*/ +- local->post_res_handler = shard_common_inode_write_post_resolve_handler; +- shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} +- +-int +-shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, inode_t *inode, +- struct iatt *buf, struct iatt *preparent, +- struct iatt *postparent, dict_t *xdata) +-{ +- inode_t *link_inode = NULL; +- shard_local_t *local = NULL; +- shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ } + +- local = frame->local; ++ if (local->create_count) { ++ shard_common_resume_mknod(frame, this, ++ shard_common_inode_write_post_mknod_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +- SHARD_UNSET_ROOT_FS_ID(frame, local); ++ return 0; ++} + +- if (op_ret == -1) { +- if (op_errno != EEXIST) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } else { +- gf_msg_debug(this->name, 0, +- "mkdir on %s failed " +- "with EEXIST. Attempting lookup now", +- shard_internal_dir_string(type)); +- shard_lookup_internal_dir(frame, this, local->post_res_handler, +- type); +- return 0; +- } +- } ++int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- link_inode = shard_link_internal_dir_inode(local, inode, buf, type); +- if (link_inode != inode) { +- shard_refresh_internal_dir(frame, this, type); +- } else { +- shard_inode_ctx_mark_dir_refreshed(link_inode, this); +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- } +- return 0; +-unwind: +- shard_common_resolve_shards(frame, this, local->post_res_handler); +- return 0; +-} ++ local = frame->local; + +-int +-shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, +- shard_post_resolve_fop_handler_t handler, +- shard_internal_dir_type_t type) +-{ +- int ret = -1; +- shard_local_t *local = NULL; +- shard_priv_t *priv = NULL; +- dict_t *xattr_req = NULL; +- uuid_t *gfid = NULL; +- loc_t *loc = NULL; +- gf_boolean_t free_gfid = _gf_true; +- +- local = frame->local; +- priv = this->private; +- +- local->post_res_handler = handler; +- gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); +- if (!gfid) +- goto err; +- +- switch (type) { +- case SHARD_INTERNAL_DIR_DOT_SHARD: +- gf_uuid_copy(*gfid, priv->dot_shard_gfid); +- loc = &local->dot_shard_loc; +- break; +- case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: +- gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); +- loc = &local->dot_shard_rm_loc; +- break; +- default: +- bzero(*gfid, sizeof(uuid_t)); +- break; +- } ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } + +- xattr_req = dict_new(); +- if (!xattr_req) +- goto err; ++ if (!local->eexist_count) { ++ shard_common_inode_write_do(frame, this); ++ } else { ++ local->call_count = local->eexist_count; ++ shard_common_lookup_shards( ++ frame, this, local->loc.inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } + +- ret = shard_init_internal_dir_loc(this, local, type); +- if (ret) +- goto err; ++ return 0; ++} + +- ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, +- "Failed to set gfid-req for %s", +- shard_internal_dir_string(type)); +- goto err; +- } else { +- free_gfid = _gf_false; +- } ++int shard_common_inode_write_post_resolve_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- SHARD_SET_ROOT_FS_ID(frame, local); ++ local = frame->local; + +- STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, +- 0755, 0, xattr_req); +- dict_unref(xattr_req); ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; ++ } + +-err: +- if (xattr_req) +- dict_unref(xattr_req); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- if (free_gfid) +- GF_FREE(gfid); +- handler(frame, this); +- return 0; +-} ++ if (local->call_count) { ++ shard_common_lookup_shards( ++ frame, this, local->resolver_base_inode, ++ shard_common_inode_write_post_lookup_shards_handler); ++ } else { ++ shard_common_inode_write_do(frame, this); ++ } + +-int +-shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *xdata) +-{ +- /* To-Do: Wind flush on all shards of the file */ +- SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); +- return 0; ++ return 0; + } + +-int +-shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +-{ +- STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->flush, fd, xdata); ++int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = frame->local; ++ shard_priv_t *priv = this->private; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); + return 0; +-} ++ } + +-int +-__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = -1; +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; ++ local->postbuf = local->prebuf; + +- ret = __inode_ctx_get(inode, this, &ctx_uint); +- if (ret < 0) +- return ret; ++ /*Adjust offset to EOF so that correct shard is chosen for append*/ ++ if (shard_is_appending_write(local)) ++ local->offset = local->prebuf.ia_size; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ local->first_block = get_lowest_block(local->offset, local->block_size); ++ local->last_block = ++ get_highest_block(local->offset, local->total_size, local->block_size); ++ local->num_blocks = local->last_block - local->first_block + 1; ++ local->inode_list = ++ GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); ++ if (!local->inode_list) { ++ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); ++ return 0; ++ } + +- local->postbuf.ia_ctime = ctx->stat.ia_ctime; +- local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; +- local->postbuf.ia_atime = ctx->stat.ia_atime; +- local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; +- local->postbuf.ia_mtime = ctx->stat.ia_mtime; +- local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; ++ gf_msg_trace( ++ this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " " ++ "last_block=%" PRIu32 " num_blocks=%" PRIu32 ++ " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", ++ gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), ++ local->first_block, local->last_block, local->num_blocks, local->offset, ++ local->total_size, local->flags); + +- return 0; +-} ++ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + +-int +-shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, +- xlator_t *this) +-{ +- int ret = 0; ++ if (!local->dot_shard_loc.inode) { ++ /*change handler*/ ++ shard_mkdir_internal_dir(frame, this, ++ shard_common_inode_write_post_resolve_handler, ++ SHARD_INTERNAL_DIR_DOT_SHARD); ++ } else { ++ /*change handler*/ ++ local->post_res_handler = shard_common_inode_write_post_resolve_handler; ++ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); ++ } ++ return 0; ++} + +- LOCK(&inode->lock); +- { +- ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); +- } +- UNLOCK(&inode->lock); ++int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) { ++ inode_t *link_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; ++ ++ local = frame->local; ++ ++ SHARD_UNSET_ROOT_FS_ID(frame, local); ++ ++ if (op_ret == -1) { ++ if (op_errno != EEXIST) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } else { ++ gf_msg_debug(this->name, 0, "mkdir on %s failed " ++ "with EEXIST. Attempting lookup now", ++ shard_internal_dir_string(type)); ++ shard_lookup_internal_dir(frame, this, local->post_res_handler, type); ++ return 0; ++ } ++ } ++ ++ link_inode = shard_link_internal_dir_inode(local, inode, buf, type); ++ if (link_inode != inode) { ++ shard_refresh_internal_dir(frame, this, type); ++ } else { ++ shard_inode_ctx_mark_dir_refreshed(link_inode, this); ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ } ++ return 0; ++unwind: ++ shard_common_resolve_shards(frame, this, local->post_res_handler); ++ return 0; ++} ++ ++int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, ++ shard_post_resolve_fop_handler_t handler, ++ shard_internal_dir_type_t type) { ++ int ret = -1; ++ shard_local_t *local = NULL; ++ shard_priv_t *priv = NULL; ++ dict_t *xattr_req = NULL; ++ uuid_t *gfid = NULL; ++ loc_t *loc = NULL; ++ gf_boolean_t free_gfid = _gf_true; ++ ++ local = frame->local; ++ priv = this->private; ++ ++ local->post_res_handler = handler; ++ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); ++ if (!gfid) ++ goto err; ++ ++ switch (type) { ++ case SHARD_INTERNAL_DIR_DOT_SHARD: ++ gf_uuid_copy(*gfid, priv->dot_shard_gfid); ++ loc = &local->dot_shard_loc; ++ break; ++ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: ++ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); ++ loc = &local->dot_shard_rm_loc; ++ break; ++ default: ++ bzero(*gfid, sizeof(uuid_t)); ++ break; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) ++ goto err; ++ ++ ret = shard_init_internal_dir_loc(this, local, type); ++ if (ret) ++ goto err; ++ ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, ++ "Failed to set gfid-req for %s", shard_internal_dir_string(type)); ++ goto err; ++ } else { ++ free_gfid = _gf_false; ++ } ++ ++ SHARD_SET_ROOT_FS_ID(frame, local); ++ ++ STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, ++ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, ++ 0755, 0, xattr_req); ++ dict_unref(xattr_req); ++ return 0; + +- return ret; ++err: ++ if (xattr_req) ++ dict_unref(xattr_req); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ if (free_gfid) ++ GF_FREE(gfid); ++ handler(frame, this); ++ return 0; + } + +-int +-shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- int call_count = 0; +- uint64_t fsync_count = 0; +- fd_t *anon_fd = cookie; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *base_ictx = NULL; +- inode_t *base_inode = NULL; +- gf_boolean_t unref_shard_inode = _gf_false; ++int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) { ++ /* To-Do: Wind flush on all shards of the file */ ++ SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); ++ return 0; ++} + +- local = frame->local; +- base_inode = local->fd->inode; ++int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ++ STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->flush, fd, xdata); ++ return 0; ++} + +- if (local->op_ret < 0) +- goto out; ++int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = -1; ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; + +- LOCK(&frame->lock); +- { +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- UNLOCK(&frame->lock); +- goto out; +- } +- shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, +- SHARD_MASK_TIMES); +- } +- UNLOCK(&frame->lock); +- fd_ctx_get(anon_fd, this, &fsync_count); +-out: +- if (anon_fd && (base_inode != anon_fd->inode)) { +- LOCK(&base_inode->lock); +- LOCK(&anon_fd->inode->lock); +- { +- __shard_inode_ctx_get(anon_fd->inode, this, &ctx); +- __shard_inode_ctx_get(base_inode, this, &base_ictx); +- if (op_ret == 0) +- ctx->fsync_needed -= fsync_count; +- GF_ASSERT(ctx->fsync_needed >= 0); +- if (ctx->fsync_needed != 0) { +- list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); +- base_ictx->fsync_count++; +- } else { +- unref_shard_inode = _gf_true; +- } +- } +- UNLOCK(&anon_fd->inode->lock); +- UNLOCK(&base_inode->lock); +- } ++ ret = __inode_ctx_get(inode, this, &ctx_uint); ++ if (ret < 0) ++ return ret; + +- if (unref_shard_inode) +- inode_unref(anon_fd->inode); +- if (anon_fd) +- fd_unref(anon_fd); ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- call_count = shard_call_count_return(frame); +- if (call_count != 0) +- return 0; ++ local->postbuf.ia_ctime = ctx->stat.ia_ctime; ++ local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; ++ local->postbuf.ia_atime = ctx->stat.ia_atime; ++ local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; ++ local->postbuf.ia_mtime = ctx->stat.ia_mtime; ++ local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- } else { +- shard_get_timestamps_from_inode_ctx(local, base_inode, this); +- SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- return 0; ++ return 0; + } + +-int +-shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) +-{ +- int ret = 0; +- int call_count = 0; +- int fsync_count = 0; +- fd_t *anon_fd = NULL; +- inode_t *base_inode = NULL; +- shard_local_t *local = NULL; +- shard_inode_ctx_t *ctx = NULL; +- shard_inode_ctx_t *iter = NULL; +- struct list_head copy = { +- 0, +- }; +- shard_inode_ctx_t *tmp = NULL; ++int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, ++ xlator_t *this) { ++ int ret = 0; + +- local = frame->local; +- base_inode = local->fd->inode; +- local->postbuf = local->prebuf; +- INIT_LIST_HEAD(©); ++ LOCK(&inode->lock); ++ { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); } ++ UNLOCK(&inode->lock); + +- if (local->op_ret < 0) { +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, +- local->op_errno); +- return 0; +- } ++ return ret; ++} + ++int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ int call_count = 0; ++ uint64_t fsync_count = 0; ++ fd_t *anon_fd = cookie; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *base_ictx = NULL; ++ inode_t *base_inode = NULL; ++ gf_boolean_t unref_shard_inode = _gf_false; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ ++ if (local->op_ret < 0) ++ goto out; ++ ++ LOCK(&frame->lock); ++ { ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ UNLOCK(&frame->lock); ++ goto out; ++ } ++ shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES); ++ } ++ UNLOCK(&frame->lock); ++ fd_ctx_get(anon_fd, this, &fsync_count); ++out: ++ if (anon_fd && (base_inode != anon_fd->inode)) { + LOCK(&base_inode->lock); ++ LOCK(&anon_fd->inode->lock); + { +- __shard_inode_ctx_get(base_inode, this, &ctx); +- list_splice_init(&ctx->to_fsync_list, ©); +- call_count = ctx->fsync_count; +- ctx->fsync_count = 0; +- } ++ __shard_inode_ctx_get(anon_fd->inode, this, &ctx); ++ __shard_inode_ctx_get(base_inode, this, &base_ictx); ++ if (op_ret == 0) ++ ctx->fsync_needed -= fsync_count; ++ GF_ASSERT(ctx->fsync_needed >= 0); ++ if (ctx->fsync_needed != 0) { ++ list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); ++ base_ictx->fsync_count++; ++ } else { ++ unref_shard_inode = _gf_true; ++ } ++ } ++ UNLOCK(&anon_fd->inode->lock); + UNLOCK(&base_inode->lock); ++ } ++ ++ if (unref_shard_inode) ++ inode_unref(anon_fd->inode); ++ if (anon_fd) ++ fd_unref(anon_fd); ++ ++ call_count = shard_call_count_return(frame); ++ if (call_count != 0) ++ return 0; + +- local->call_count = ++call_count; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ } else { ++ shard_get_timestamps_from_inode_ctx(local, base_inode, this); ++ SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } ++ return 0; ++} ++ ++int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) { ++ int ret = 0; ++ int call_count = 0; ++ int fsync_count = 0; ++ fd_t *anon_fd = NULL; ++ inode_t *base_inode = NULL; ++ shard_local_t *local = NULL; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_inode_ctx_t *iter = NULL; ++ struct list_head copy = { ++ 0, ++ }; ++ shard_inode_ctx_t *tmp = NULL; ++ ++ local = frame->local; ++ base_inode = local->fd->inode; ++ local->postbuf = local->prebuf; ++ INIT_LIST_HEAD(©); ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ LOCK(&base_inode->lock); ++ { ++ __shard_inode_ctx_get(base_inode, this, &ctx); ++ list_splice_init(&ctx->to_fsync_list, ©); ++ call_count = ctx->fsync_count; ++ ctx->fsync_count = 0; ++ } ++ UNLOCK(&base_inode->lock); ++ ++ local->call_count = ++call_count; ++ ++ /* Send fsync() on the base shard first */ ++ anon_fd = fd_ref(local->fd); ++ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, ++ local->xattr_req); ++ call_count--; ++ anon_fd = NULL; ++ ++ list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) { ++ list_del_init(&iter->to_fsync_list); ++ fsync_count = 0; ++ shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); ++ GF_ASSERT(fsync_count > 0); ++ anon_fd = fd_anonymous(iter->inode); ++ if (!anon_fd) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, ++ "Failed to create " ++ "anon fd to fsync shard"); ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + +- /* Send fsync() on the base shard first */ +- anon_fd = fd_ref(local->fd); ++ ret = fd_ctx_set(anon_fd, this, fsync_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, ++ "Failed to set fd " ++ "ctx for shard inode gfid=%s", ++ uuid_utoa(iter->inode->gfid)); ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, ++ NULL, NULL, NULL); ++ continue; ++ } + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, + local->xattr_req); + call_count--; +- anon_fd = NULL; +- +- list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) +- { +- list_del_init(&iter->to_fsync_list); +- fsync_count = 0; +- shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); +- GF_ASSERT(fsync_count > 0); +- anon_fd = fd_anonymous(iter->inode); +- if (!anon_fd) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- gf_msg(this->name, GF_LOG_WARNING, ENOMEM, +- SHARD_MSG_MEMALLOC_FAILED, +- "Failed to create " +- "anon fd to fsync shard"); +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- +- ret = fd_ctx_set(anon_fd, this, fsync_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, +- "Failed to set fd " +- "ctx for shard inode gfid=%s", +- uuid_utoa(iter->inode->gfid)); +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, +- ENOMEM, NULL, NULL, NULL); +- continue; +- } +- STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +- anon_fd, local->datasync, local->xattr_req); +- call_count--; +- } ++ } + +- return 0; ++ return 0; + } + +-int +-shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, +- dict_t *xdata) +-{ +- int ret = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, ++ dict_t *xdata) { ++ int ret = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); +- return 0; +- } ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); ++ return 0; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->fd = fd_ref(fd); +- local->fop = GF_FOP_FSYNC; +- local->datasync = datasync; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; ++ local->fd = fd_ref(fd); ++ local->fop = GF_FOP_FSYNC; ++ local->datasync = datasync; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_post_lookup_fsync_handler); +- return 0; ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_fsync_handler); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, +- xlator_t *this, int32_t op_ret, +- int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; ++int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; + +- local = frame->local; ++ local = frame->local; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (!entry->dict) +- continue; ++ if (!entry->dict) ++ continue; + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (!entry->inode) ++ continue; + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } +- local->op_ret += op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } ++ local->op_ret += op_ret; + + unwind: +- if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, +- &local->entries_head, xdata); +- else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); +- return 0; ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, ++ &local->entries_head, xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; + } + +-int32_t +-shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, +- dict_t *xdata) +-{ +- fd_t *fd = NULL; +- gf_dirent_t *entry = NULL; +- gf_dirent_t *tmp = NULL; +- shard_local_t *local = NULL; +- gf_boolean_t last_entry = _gf_false; ++int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ gf_dirent_t *orig_entries, dict_t *xdata) { ++ fd_t *fd = NULL; ++ gf_dirent_t *entry = NULL; ++ gf_dirent_t *tmp = NULL; ++ shard_local_t *local = NULL; ++ gf_boolean_t last_entry = _gf_false; + +- local = frame->local; +- fd = local->fd; ++ local = frame->local; ++ fd = local->fd; + +- if (op_ret < 0) +- goto unwind; ++ if (op_ret < 0) ++ goto unwind; + +- list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) +- { +- if (last_entry) +- last_entry = _gf_false; +- +- if (__is_root_gfid(fd->inode->gfid) && +- !(strcmp(entry->d_name, GF_SHARD_DIR))) { +- local->offset = entry->d_off; +- op_ret--; +- last_entry = _gf_true; +- continue; +- } ++ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { ++ if (last_entry) ++ last_entry = _gf_false; + +- list_del_init(&entry->list); +- list_add_tail(&entry->list, &local->entries_head.list); ++ if (__is_root_gfid(fd->inode->gfid) && ++ !(strcmp(entry->d_name, GF_SHARD_DIR))) { ++ local->offset = entry->d_off; ++ op_ret--; ++ last_entry = _gf_true; ++ continue; ++ } + +- if (!entry->dict) +- continue; ++ list_del_init(&entry->list); ++ list_add_tail(&entry->list, &local->entries_head.list); + +- if (IA_ISDIR(entry->d_stat.ia_type)) +- continue; ++ if (!entry->dict) ++ continue; + +- if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && +- frame->root->pid != GF_CLIENT_PID_GSYNCD) +- shard_modify_size_and_block_count(&entry->d_stat, entry->dict); ++ if (IA_ISDIR(entry->d_stat.ia_type)) ++ continue; + +- if (!entry->inode) +- continue; ++ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && ++ frame->root->pid != GF_CLIENT_PID_GSYNCD) ++ shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + +- shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); +- } ++ if (!entry->inode) ++ continue; + +- local->op_ret = op_ret; ++ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); ++ } + +- if (last_entry) { +- if (local->fop == GF_FOP_READDIR) +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- else +- STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, +- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, +- local->fd, local->readdir_size, local->offset, +- local->xattr_req); +- return 0; +- } ++ local->op_ret = op_ret; + +-unwind: ++ if (last_entry) { + if (local->fop == GF_FOP_READDIR) +- SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + else +- SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, +- &local->entries_head, xdata); ++ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, local->fd, ++ local->readdir_size, local->offset, local->xattr_req); + return 0; +-} ++ } + +-int +-shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, int whichop, dict_t *xdata) +-{ +- int ret = 0; +- shard_local_t *local = NULL; +- +- local = mem_get0(this->local_pool); +- if (!local) { +- goto err; ++unwind: ++ if (local->fop == GF_FOP_READDIR) ++ SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, ++ xdata); ++ return 0; ++} ++ ++int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ++ off_t offset, int whichop, dict_t *xdata) { ++ int ret = 0; ++ shard_local_t *local = NULL; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) { ++ goto err; ++ } ++ ++ frame->local = local; ++ ++ local->fd = fd_ref(fd); ++ local->fop = whichop; ++ local->readdir_size = size; ++ INIT_LIST_HEAD(&local->entries_head.list); ++ local->list_inited = _gf_true; ++ ++ if (whichop == GF_FOP_READDIR) { ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); ++ } else { ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); ++ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); ++ if (ret) { ++ gf_log(this->name, GF_LOG_WARNING, ++ "Failed to set " ++ "dict value: key:%s, directory gfid=%s", ++ GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); ++ goto err; + } + +- frame->local = local; +- +- local->fd = fd_ref(fd); +- local->fop = whichop; +- local->readdir_size = size; +- INIT_LIST_HEAD(&local->entries_head.list); +- local->list_inited = _gf_true; +- +- if (whichop == GF_FOP_READDIR) { +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); +- } else { +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); +- ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); +- if (ret) { +- gf_log(this->name, GF_LOG_WARNING, +- "Failed to set " +- "dict value: key:%s, directory gfid=%s", +- GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); +- goto err; +- } +- +- STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->readdirp, fd, size, offset, +- local->xattr_req); +- } ++ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, ++ local->xattr_req); ++ } + +- return 0; ++ return 0; + + err: +- STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); +- return 0; ++ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); ++ return 0; + } + +-int32_t +-shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); +- return 0; ++int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); ++ return 0; + } + +-int32_t +-shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, +- off_t offset, dict_t *xdata) +-{ +- shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); +- return 0; ++int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ size_t size, off_t offset, dict_t *xdata) { ++ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); ++ return 0; + } + +-int32_t +-shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); ++ } + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, +- dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) +-{ +- if (op_ret < 0) +- goto unwind; ++int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) { ++ if (op_ret < 0) ++ goto unwind; + +- if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); ++ } + + unwind: +- SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); +- return 0; ++ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); ++ return 0; + } + +-int32_t +-shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && +- (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { +- op_errno = ENODATA; +- goto out; +- } ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && ++ (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { ++ op_errno = ENODATA; ++ goto out; ++ } + +- STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); +- return 0; ++ STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, ++ fd, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int32_t +-shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, +- int32_t flags, dict_t *xdata) +-{ +- int op_errno = EINVAL; ++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int op_errno = EINVAL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, +- loc, dict, flags, xdata); +- return 0; ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, ++ loc, dict, flags, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); ++ return 0; + } + +-int +-shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (local->fop == GF_FOP_SETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } else if (local->fop == GF_FOP_FSETATTR) { +- if (local->op_ret >= 0) +- shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, +- SHARD_LOOKUP_MASK); +- SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, +- &local->prebuf, &local->postbuf, local->xattr_rsp); +- } +- +- return 0; +-} ++int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { ++ shard_local_t *local = NULL; + +-int +-shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +- struct iatt *postbuf, dict_t *xdata) +-{ +- shard_local_t *local = NULL; +- +- local = frame->local; +- +- if (op_ret < 0) { +- local->op_ret = op_ret; +- local->op_errno = op_errno; +- goto unwind; +- } ++ local = frame->local; + +- local->prebuf = *prebuf; +- if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { +- local->op_ret = -1; +- local->op_errno = EINVAL; +- goto unwind; +- } +- if (xdata) +- local->xattr_rsp = dict_ref(xdata); +- local->postbuf = *postbuf; +- local->postbuf.ia_size = local->prebuf.ia_size; +- local->postbuf.ia_blocks = local->prebuf.ia_blocks; ++ if (local->fop == GF_FOP_SETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } else if (local->fop == GF_FOP_FSETATTR) { ++ if (local->op_ret >= 0) ++ shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, ++ SHARD_LOOKUP_MASK); ++ SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, ++ &local->prebuf, &local->postbuf, local->xattr_rsp); ++ } + +-unwind: +- local->handler(frame, this); +- return 0; ++ return 0; + } + +-int +-shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(loc->inode->gfid)); +- goto err; +- } +- +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); +- return 0; +- } +- +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, ++ struct iatt *prebuf, struct iatt *postbuf, ++ dict_t *xdata) { ++ shard_local_t *local = NULL; + +- frame->local = local; ++ local = frame->local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_SETATTR; +- loc_copy(&local->loc, loc); ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto unwind; ++ } + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, +- local, err); ++ local->prebuf = *prebuf; ++ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { ++ local->op_ret = -1; ++ local->op_errno = EINVAL; ++ goto unwind; ++ } ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ local->postbuf = *postbuf; ++ local->postbuf.ia_size = local->prebuf.ia_size; ++ local->postbuf.ia_blocks = local->prebuf.ia_blocks; + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, +- local->xattr_req); +- return 0; +-err: +- shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); +- return 0; ++unwind: ++ local->handler(frame, this); ++ return 0; + } + +-int +-shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iatt *stbuf, int32_t valid, dict_t *xdata) +-{ +- int ret = -1; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; +- +- if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block size from inode ctx of %s", +- uuid_utoa(fd->inode->gfid)); +- goto err; +- } ++ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(loc->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto err; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- frame->local = local; ++ frame->local = local; + +- local->handler = shard_post_setattr_handler; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto err; +- local->fop = GF_FOP_FSETATTR; +- local->fd = fd_ref(fd); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_SETATTR; ++ loc_copy(&local->loc, loc); + +- SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, +- local, err); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, ++ local, err); + +- STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, +- local->xattr_req); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, ++ local->xattr_req); ++ return 0; + err: +- shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, +- glusterfs_fop_t fop, fd_t *fd, +- struct iovec *vector, int32_t count, +- off_t offset, uint32_t flags, size_t len, +- struct iobref *iobref, dict_t *xdata) +-{ +- int ret = 0; +- int i = 0; +- uint64_t block_size = 0; +- shard_local_t *local = NULL; ++int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iatt *stbuf, int32_t valid, dict_t *xdata) { ++ int ret = -1; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; + +- ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, +- "Failed to get block " +- "size for %s from its inode ctx", +- uuid_utoa(fd->inode->gfid)); +- goto out; +- } ++ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +- /* block_size = 0 means that the file was created before +- * sharding was enabled on the volume. +- */ +- switch (fop) { +- case GF_FOP_WRITE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->writev, fd, vector, +- count, offset, flags, iobref, xdata); +- break; +- case GF_FOP_FALLOCATE: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fallocate, fd, flags, +- offset, len, xdata); +- break; +- case GF_FOP_ZEROFILL: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->zerofill, fd, offset, +- len, xdata); +- break; +- case GF_FOP_DISCARD: +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->discard, fd, offset, +- len, xdata); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, +- "Invalid fop id = %d", fop); +- break; +- } +- return 0; +- } ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(fd->inode->gfid)); ++ goto err; ++ } + +- if (!this->itable) +- this->itable = fd->inode->table; ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); ++ return 0; ++ } + +- local = mem_get0(this->local_pool); +- if (!local) +- goto out; ++ if (!this->itable) ++ this->itable = fd->inode->table; + +- frame->local = local; ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; + +- ret = syncbarrier_init(&local->barrier); +- if (ret) +- goto out; +- local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); +- if (!local->xattr_req) +- goto out; +- +- if (vector) { +- local->vector = iov_dup(vector, count); +- if (!local->vector) +- goto out; +- for (i = 0; i < count; i++) +- local->total_size += vector[i].iov_len; +- local->count = count; +- } else { +- local->total_size = len; +- } ++ frame->local = local; + +- local->fop = fop; +- local->offset = offset; +- local->flags = flags; +- if (iobref) +- local->iobref = iobref_ref(iobref); +- local->fd = fd_ref(fd); +- local->block_size = block_size; +- local->resolver_base_inode = local->fd->inode; +- GF_ATOMIC_INIT(local->delta_blocks, 0); ++ local->handler = shard_post_setattr_handler; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto err; ++ local->fop = GF_FOP_FSETATTR; ++ local->fd = fd_ref(fd); + +- local->loc.inode = inode_ref(fd->inode); +- gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, ++ local, err); + +- shard_lookup_base_file(frame, this, &local->loc, +- shard_common_inode_write_post_lookup_handler); +- return 0; ++ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, ++ local->xattr_req); ++ return 0; ++err: ++ shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); ++ return 0; ++} ++ ++int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, fd_t *fd, ++ struct iovec *vector, int32_t count, ++ off_t offset, uint32_t flags, size_t len, ++ struct iobref *iobref, dict_t *xdata) { ++ int ret = 0; ++ int i = 0; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ ++ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block " ++ "size for %s from its inode ctx", ++ uuid_utoa(fd->inode->gfid)); ++ goto out; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ /* block_size = 0 means that the file was created before ++ * sharding was enabled on the volume. ++ */ ++ switch (fop) { ++ case GF_FOP_WRITE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, ++ fd, vector, count, offset, flags, iobref, xdata); ++ break; ++ case GF_FOP_FALLOCATE: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fallocate, fd, flags, offset, ++ len, xdata); ++ break; ++ case GF_FOP_ZEROFILL: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, ++ xdata); ++ break; ++ case GF_FOP_DISCARD: ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, ++ "Invalid fop id = %d", fop); ++ break; ++ } ++ return 0; ++ } ++ ++ if (!this->itable) ++ this->itable = fd->inode->table; ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto out; ++ ++ frame->local = local; ++ ++ ret = syncbarrier_init(&local->barrier); ++ if (ret) ++ goto out; ++ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); ++ if (!local->xattr_req) ++ goto out; ++ ++ if (vector) { ++ local->vector = iov_dup(vector, count); ++ if (!local->vector) ++ goto out; ++ for (i = 0; i < count; i++) ++ local->total_size += vector[i].iov_len; ++ local->count = count; ++ } else { ++ local->total_size = len; ++ } ++ ++ local->fop = fop; ++ local->offset = offset; ++ local->flags = flags; ++ if (iobref) ++ local->iobref = iobref_ref(iobref); ++ local->fd = fd_ref(fd); ++ local->block_size = block_size; ++ local->resolver_base_inode = local->fd->inode; ++ GF_ATOMIC_INIT(local->delta_blocks, 0); ++ ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_common_inode_write_post_lookup_handler); ++ return 0; + out: +- shard_common_failure_unwind(fop, frame, -1, ENOMEM); +- return 0; ++ shard_common_failure_unwind(fop, frame, -1, ENOMEM); ++ return 0; + } + +-int +-shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, +- struct iovec *vector, int32_t count, off_t offset, uint32_t flags, +- struct iobref *iobref, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, +- offset, flags, 0, iobref, xdata); +- return 0; ++int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ struct iovec *vector, int32_t count, off_t offset, ++ uint32_t flags, struct iobref *iobref, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, ++ offset, flags, 0, iobref, xdata); ++ return 0; + } + +-int +-shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, +- int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +-{ +- if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && +- (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) +- goto out; ++int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ int32_t keep_size, off_t offset, size_t len, ++ dict_t *xdata) { ++ if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && ++ (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) ++ goto out; + +- shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, +- offset, keep_size, len, NULL, xdata); +- return 0; ++ shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, ++ offset, keep_size, len, NULL, xdata); ++ return 0; + out: +- shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); +- return 0; ++ shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); ++ return 0; + } + +-int +-shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- off_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ off_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int +-shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- size_t len, dict_t *xdata) +-{ +- shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, +- offset, 0, len, NULL, xdata); +- return 0; ++int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ size_t len, dict_t *xdata) { ++ shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, ++ offset, 0, len, NULL, xdata); ++ return 0; + } + +-int32_t +-shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, +- gf_seek_what_t what, dict_t *xdata) +-{ +- /* TBD */ +- gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, +- "seek called on %s.", uuid_utoa(fd->inode->gfid)); +- shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); +- return 0; ++int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ gf_seek_what_t what, dict_t *xdata) { ++ /* TBD */ ++ gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, ++ "seek called on %s.", uuid_utoa(fd->inode->gfid)); ++ shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); ++ return 0; + } + +-int32_t +-mem_acct_init(xlator_t *this) +-{ +- int ret = -1; +- +- if (!this) +- return ret; ++int32_t mem_acct_init(xlator_t *this) { ++ int ret = -1; + +- ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); ++ if (!this) ++ return ret; + +- if (ret != 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, +- "Memory accounting init" +- "failed"); +- return ret; +- } ++ ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); + ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, ++ "Memory accounting init" ++ "failed"); + return ret; ++ } ++ ++ return ret; + } + +-int +-init(xlator_t *this) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int init(xlator_t *this) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- if (!this) { +- gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, +- "this is NULL. init() failed"); +- return -1; +- } ++ if (!this) { ++ gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, ++ "this is NULL. init() failed"); ++ return -1; ++ } + +- if (!this->parents) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "Dangling volume. Check volfile"); +- goto out; +- } ++ if (!this->parents) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "Dangling volume. Check volfile"); ++ goto out; ++ } + +- if (!this->children || this->children->next) { +- gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, +- "shard not configured with exactly one sub-volume. " +- "Check volfile"); +- goto out; +- } ++ if (!this->children || this->children->next) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, ++ "shard not configured with exactly one sub-volume. " ++ "Check volfile"); ++ goto out; ++ } + +- priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); +- if (!priv) +- goto out; ++ priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); ++ if (!priv) ++ goto out; + +- GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); ++ GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); + +- GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); ++ GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); + +- GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); ++ GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); + +- this->local_pool = mem_pool_new(shard_local_t, 128); +- if (!this->local_pool) { +- ret = -1; +- goto out; +- } +- gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); +- gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); ++ this->local_pool = mem_pool_new(shard_local_t, 128); ++ if (!this->local_pool) { ++ ret = -1; ++ goto out; ++ } ++ gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); ++ gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + +- this->private = priv; +- LOCK_INIT(&priv->lock); +- INIT_LIST_HEAD(&priv->ilist_head); +- ret = 0; ++ this->private = priv; ++ LOCK_INIT(&priv->lock); ++ INIT_LIST_HEAD(&priv->ilist_head); ++ ret = 0; + out: +- if (ret) { +- GF_FREE(priv); +- mem_pool_destroy(this->local_pool); +- } ++ if (ret) { ++ GF_FREE(priv); ++ mem_pool_destroy(this->local_pool); ++ } + +- return ret; ++ return ret; + } + +-void +-fini(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; ++void fini(xlator_t *this) { ++ shard_priv_t *priv = NULL; + +- GF_VALIDATE_OR_GOTO("shard", this, out); ++ GF_VALIDATE_OR_GOTO("shard", this, out); + +- mem_pool_destroy(this->local_pool); +- this->local_pool = NULL; ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; + +- priv = this->private; +- if (!priv) +- goto out; ++ priv = this->private; ++ if (!priv) ++ goto out; + +- this->private = NULL; +- LOCK_DESTROY(&priv->lock); +- GF_FREE(priv); ++ this->private = NULL; ++ LOCK_DESTROY(&priv->lock); ++ GF_FREE(priv); + + out: +- return; ++ return; + } + +-int +-reconfigure(xlator_t *this, dict_t *options) +-{ +- int ret = -1; +- shard_priv_t *priv = NULL; ++int reconfigure(xlator_t *this, dict_t *options) { ++ int ret = -1; ++ shard_priv_t *priv = NULL; + +- priv = this->private; ++ priv = this->private; + +- GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); ++ GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); + +- GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, +- uint32, out); +- ret = 0; ++ GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32, ++ out); ++ ret = 0; + + out: +- return ret; ++ return ret; + } + +-int +-shard_forget(xlator_t *this, inode_t *inode) +-{ +- uint64_t ctx_uint = 0; +- shard_inode_ctx_t *ctx = NULL; +- shard_priv_t *priv = NULL; ++int shard_forget(xlator_t *this, inode_t *inode) { ++ uint64_t ctx_uint = 0; ++ shard_inode_ctx_t *ctx = NULL; ++ shard_priv_t *priv = NULL; + +- priv = this->private; +- if (!priv) +- return 0; ++ priv = this->private; ++ if (!priv) ++ return 0; + +- inode_ctx_del(inode, this, &ctx_uint); +- if (!ctx_uint) +- return 0; ++ inode_ctx_del(inode, this, &ctx_uint); ++ if (!ctx_uint) ++ return 0; + +- ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; ++ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + +- /* When LRU limit reaches inode will be forcefully removed from the +- * table, inode needs to be removed from LRU of shard as well. +- */ +- if (!list_empty(&ctx->ilist)) { +- LOCK(&priv->lock); +- { +- list_del_init(&ctx->ilist); +- priv->inode_count--; +- } +- UNLOCK(&priv->lock); ++ /* When LRU limit reaches inode will be forcefully removed from the ++ * table, inode needs to be removed from LRU of shard as well. ++ */ ++ if (!list_empty(&ctx->ilist)) { ++ LOCK(&priv->lock); ++ { ++ list_del_init(&ctx->ilist); ++ priv->inode_count--; + } +- GF_FREE(ctx); ++ UNLOCK(&priv->lock); ++ } ++ GF_FREE(ctx); + +- return 0; ++ return 0; + } + +-int +-shard_release(xlator_t *this, fd_t *fd) +-{ +- /* TBD */ +- return 0; ++int shard_release(xlator_t *this, fd_t *fd) { ++ /* TBD */ ++ return 0; + } + +-int +-shard_priv_dump(xlator_t *this) +-{ +- shard_priv_t *priv = NULL; +- char key_prefix[GF_DUMP_MAX_BUF_LEN] = { +- 0, +- }; +- char *str = NULL; ++int shard_priv_dump(xlator_t *this) { ++ shard_priv_t *priv = NULL; ++ char key_prefix[GF_DUMP_MAX_BUF_LEN] = { ++ 0, ++ }; ++ char *str = NULL; + +- priv = this->private; ++ priv = this->private; + +- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); +- gf_proc_dump_add_section("%s", key_prefix); +- str = gf_uint64_2human_readable(priv->block_size); +- gf_proc_dump_write("shard-block-size", "%s", str); +- gf_proc_dump_write("inode-count", "%d", priv->inode_count); +- gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); +- gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); ++ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); ++ gf_proc_dump_add_section("%s", key_prefix); ++ str = gf_uint64_2human_readable(priv->block_size); ++ gf_proc_dump_write("shard-block-size", "%s", str); ++ gf_proc_dump_write("inode-count", "%d", priv->inode_count); ++ gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); ++ gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); + +- GF_FREE(str); ++ GF_FREE(str); + +- return 0; ++ return 0; + } + +-int +-shard_releasedir(xlator_t *this, fd_t *fd) +-{ +- return 0; +-} ++int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; } + + struct xlator_fops fops = { + .lookup = shard_lookup, +-- +1.8.3.1 + diff --git a/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch new file mode 100644 index 0000000..df971b8 --- /dev/null +++ b/SOURCES/0336-spec-check-and-return-exit-code-in-rpm-scripts.patch @@ -0,0 +1,162 @@ +From 562283ad34021bbf4fc540127ee7072d5152d34d Mon Sep 17 00:00:00 2001 +From: Yuval Turgeman <yturgema@redhat.com> +Date: Wed, 24 Jul 2019 16:42:22 +0300 +Subject: [PATCH 336/336] spec: check and return exit code in rpm scripts + +lua's error() call expects a value as its second argument, and this is +taken from the `val` variable, while the `ok` is boolean. This causes +the rpm scripts to fail on: + +bad argument #2 to 'error' (number expected, got boolean) + +Label: DOWNSTREAM ONLY +BUG: 1768786 +Change-Id: I9c6b1f62ebf15dbc93196d018bc1fd628b36fc33 +>Signed-off-by: Yuval Turgeman <yturgema@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/186405 +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 55 +++++++++++++++++++++++++++++++++---------------------- + 1 file changed, 33 insertions(+), 22 deletions(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 91180db..1b975b2 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1572,8 +1572,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1606,8 +1607,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1640,8 +1642,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1674,8 +1677,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1707,8 +1711,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1740,8 +1745,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1775,8 +1781,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1810,8 +1817,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + +@@ -1845,8 +1853,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1881,8 +1890,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + %endif + +@@ -1916,8 +1926,9 @@ fi + ]] + + ok, how, val = os.execute(script) +-if not (ok == 0) then +- error("Detected running glusterfs processes", ok) ++rc = val or ok ++if not (rc == 0) then ++ error("Detected running glusterfs processes", rc) + end + + %posttrans server +-- +1.8.3.1 + diff --git a/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch new file mode 100644 index 0000000..b18ef4f --- /dev/null +++ b/SOURCES/0337-fuse-Set-limit-on-invalidate-queue-size.patch @@ -0,0 +1,455 @@ +From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Fri, 9 Aug 2019 14:34:22 +0530 +Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size + +If the glusterfs fuse client process is unable to +process the invalidate requests quickly enough, the +number of such requests quickly grows large enough +to use a significant amount of memory. +We are now introducing another option to set an upper +limit on these to prevent runaway memory usage. + +> Upstream https://review.gluster.org/23187 +> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788 +> Fixes: bz#1732717 +> Signed-off-by: N Balachandran <nbalacha@redhat.com> + +BUG: 1763208 +Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/187529 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + doc/mount.glusterfs.8 | 5 +++ + glusterfsd/src/glusterfsd.c | 21 ++++++++++ + glusterfsd/src/glusterfsd.h | 3 +- + libglusterfs/src/glusterfs/glusterfs.h | 1 + + libglusterfs/src/glusterfs/inode.h | 1 + + libglusterfs/src/inode.c | 31 +++++++++++---- + xlators/mount/fuse/src/fuse-bridge.c | 60 ++++++++++++++++++++++------- + xlators/mount/fuse/src/fuse-bridge.h | 3 +- + xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++++ + 9 files changed, 108 insertions(+), 24 deletions(-) + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index 286631b..b35b362 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None] + Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] + .TP + .TP ++\fBinvalidate-limit=\fRN ++Suspend fuse invalidations implied by 'lru-limit' if number of outstanding ++invalidations reaches N ++.TP ++.TP + \fBbackground-qlen=\fRN + Set fuse module's background queue length to N [default: 64] + .TP +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 5b5e996..0856471 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = { + {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, + "Set fuse module's limit for number of inodes kept in LRU list to N " + "[default: 131072]"}, ++ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, ++ "Suspend inode invalidations implied by 'lru-limit' if the number of " ++ "outstanding invalidations reaches N"}, + {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, + "Set fuse module's background queue length to N " + "[default: 64]"}, +@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options) + } + } + ++ if (cmd_args->invalidate_limit >= 0) { ++ ret = dict_set_int32(options, "invalidate-limit", ++ cmd_args->invalidate_limit); ++ if (ret < 0) { ++ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, ++ "invalidate-limit"); ++ goto err; ++ } ++ } ++ + if (cmd_args->background_qlen) { + ret = dict_set_int32(options, "background-qlen", + cmd_args->background_qlen); +@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state) + argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); + break; + ++ case ARGP_FUSE_INVALIDATE_LIMIT_KEY: ++ if (!gf_string2int32(arg, &cmd_args->invalidate_limit)) ++ break; ++ ++ argp_failure(state, -1, 0, "unknown invalidate limit option %s", ++ arg); ++ break; ++ + case ARGP_FUSE_BACKGROUND_QLEN_KEY: + if (!gf_string2int(arg, &cmd_args->background_qlen)) + break; +diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h +index fa55789..ee655f0 100644 +--- a/glusterfsd/src/glusterfsd.h ++++ b/glusterfsd/src/glusterfsd.h +@@ -111,7 +111,8 @@ enum argp_option_keys { + ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, + ARGP_FUSE_LRU_LIMIT_KEY = 190, + ARGP_FUSE_AUTO_INVAL_KEY = 191, +- ARGP_BRICK_MUX_KEY = 192 ++ ARGP_BRICK_MUX_KEY = 192, ++ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195, + }; + + struct _gfd_vol_top_priv { +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 79c93ae..3b594c0 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -541,6 +541,7 @@ struct _cmd_args { + int client_pid_set; + unsigned uid_map_root; + int32_t lru_limit; ++ int32_t invalidate_limit; + int background_qlen; + int congestion_threshold; + char *fuse_mountopts; +diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h +index 52efdd8..4421c47 100644 +--- a/libglusterfs/src/glusterfs/inode.h ++++ b/libglusterfs/src/glusterfs/inode.h +@@ -107,6 +107,7 @@ struct _inode { + struct list_head list; /* active/lru/purge */ + + struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ ++ bool in_invalidate_list; /* Set if inode is in table invalidate list */ + bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ + }; + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 96ddea5..5331e93 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear) + + this = THIS; + +- if (clear && inode->invalidate_sent) { +- inode->invalidate_sent = false; ++ if (clear && inode->in_invalidate_list) { ++ inode->in_invalidate_list = false; + inode->table->invalidate_size--; + __inode_activate(inode); + } +@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear) + inode->_ctx[index].ref--; + } + +- if (!inode->ref && !inode->invalidate_sent) { ++ if (!inode->ref && !inode->in_invalidate_list) { + inode->table->active_size--; + + nlookup = GF_ATOMIC_GET(inode->nlookup); +@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate) + return inode; + + if (!inode->ref) { +- if (inode->invalidate_sent) { +- inode->invalidate_sent = false; ++ if (inode->in_invalidate_list) { ++ inode->in_invalidate_list = false; + inode->table->invalidate_size--; + } else { + inode->table->lru_size--; + } + if (is_invalidate) { +- inode->invalidate_sent = true; ++ inode->in_invalidate_list = true; + inode->table->invalidate_size++; + list_move_tail(&inode->list, &inode->table->invalidate); + } else { +@@ -1609,6 +1609,7 @@ static int + inode_table_prune(inode_table_t *table) + { + int ret = 0; ++ int ret1 = 0; + struct list_head purge = { + 0, + }; +@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table) + /* check for valid inode with 'nlookup' */ + nlookup = GF_ATOMIC_GET(entry->nlookup); + if (nlookup) { ++ if (entry->invalidate_sent) { ++ list_move_tail(&entry->list, &table->lru); ++ continue; ++ } + __inode_ref(entry, true); + tmp = entry; + break; +@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table) + if (tmp) { + xlator_t *old_THIS = THIS; + THIS = table->invalidator_xl; +- table->invalidator_fn(table->invalidator_xl, tmp); ++ ret1 = table->invalidator_fn(table->invalidator_xl, tmp); + THIS = old_THIS; +- inode_unref(tmp); ++ pthread_mutex_lock(&table->lock); ++ { ++ if (!ret1) { ++ tmp->invalidate_sent = true; ++ __inode_unref(tmp, false); ++ } else { ++ /* Move this back to the lru list*/ ++ __inode_unref(tmp, true); ++ } ++ } ++ pthread_mutex_unlock(&table->lock); + } + + /* Just so that if purge list is handled too, then clear it off */ +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 1c946a2..8b2e7f0 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log; + void + fini(xlator_t *this_xl); + +-static void ++static int32_t + fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + + /* +@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) + #define send_fuse_obj(this, finh, obj) \ + send_fuse_data(this, finh, obj, sizeof(*(obj))) + +-static void ++static int32_t + fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + { + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + + priv = this->private; + if (!priv->reverse_fuse_thread_started) +- return; ++ return -1; ++ ++ if (priv->invalidate_limit && ++ (priv->invalidate_count >= priv->invalidate_limit)) { ++ return -1; ++ } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) +- return; ++ return -1; + + list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list) + { + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) +- break; ++ return -1; + + INIT_LIST_HEAD(&node->next); + +@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); ++ priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); + } + pthread_mutex_unlock(&priv->invalidate_mutex); + } + + #endif +- return; ++ return 0; + } + + /* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +-static void ++static int32_t + fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + { + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + priv = this->private; + + if (!priv->reverse_fuse_thread_started) +- return; ++ return -1; ++ ++ if (priv->invalidate_limit && ++ (priv->invalidate_count >= priv->invalidate_limit)) { ++ return -1; ++ } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) +- return; ++ return -1; + + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) +- return; ++ return -1; + + INIT_LIST_HEAD(&node->next); + +@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); ++ priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); + } + pthread_mutex_unlock(&priv->invalidate_mutex); +@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "fuse_invalidate_inode not implemented on this system"); + #endif +- return; ++ return 0; + } + + #if FUSE_KERNEL_MINOR_VERSION >= 11 +@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + static int32_t + fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) + { +- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); +- return 0; ++ int32_t ret = 0; ++ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); ++ return ret; + } + #endif + +@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64, + finh->nodeid); + #if FUSE_KERNEL_MINOR_VERSION >= 11 +- fuse_invalidate_entry(this, finh->nodeid); ++ ret = fuse_invalidate_entry(this, finh->nodeid); ++ if (ret) ++ op_errno = EBUSY; + #endif + goto done; + } +@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data) + fuse_invalidate_node_t, next); + + list_del_init(&node->next); ++ priv->invalidate_count--; + } + pthread_mutex_unlock(&priv->invalidate_mutex); + +@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data) + list_del_init(&node->next); + GF_FREE(node); + } ++ priv->invalidate_count = 0; + } + pthread_mutex_unlock(&priv->invalidate_mutex); + +@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this) + (int)private->timed_response_fuse_thread_started); + gf_proc_dump_write("reverse_thread_started", "%d", + (int)private->reverse_fuse_thread_started); ++ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit); ++ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64, ++ private->invalidate_count); + gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); + + return 0; +@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl) + + GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); + ++ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32, ++ cleanup_exit); ++ + GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); + + GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); +@@ -6955,6 +6978,15 @@ struct volume_options options[] = { + "reaching this limit (0 means 'unlimited')", + }, + { ++ .key = {"invalidate-limit"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = "0", ++ .min = 0, ++ .description = "suspend invalidations as of 'lru-limit' if the number " ++ "of outstanding invalidations reaches this limit " ++ "(0 means 'unlimited')", ++ }, ++ { + .key = {"auto-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 697bd88..2311582 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -139,7 +139,7 @@ struct fuse_private { + pthread_cond_t invalidate_cond; + pthread_mutex_t invalidate_mutex; + gf_boolean_t reverse_fuse_thread_started; +- ++ uint64_t invalidate_count; + /* For communicating with separate mount thread. */ + int status_pipe[2]; + +@@ -191,6 +191,7 @@ struct fuse_private { + + /* LRU Limit, if not set, default is 128k for now */ + uint32_t lru_limit; ++ uint32_t invalidate_limit; + }; + typedef struct fuse_private fuse_private_t; + +diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in +index cbde42d..61d7422 100755 +--- a/xlators/mount/fuse/utils/mount.glusterfs.in ++++ b/xlators/mount/fuse/utils/mount.glusterfs.in +@@ -257,6 +257,10 @@ start_glusterfs () + cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); + fi + ++ if [ -n "$invalidate_limit" ]; then ++ cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit"); ++ fi ++ + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi +@@ -505,6 +509,9 @@ with_options() + "lru-limit") + lru_limit=$value + ;; ++ "invalidate-limit") ++ invalidate_limit=$value ++ ;; + "background-qlen") + bg_qlen=$value + ;; +-- +1.8.3.1 + diff --git a/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch new file mode 100644 index 0000000..b108bd0 --- /dev/null +++ b/SOURCES/0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch @@ -0,0 +1,83 @@ +From 6d2e12a53ef0bcbeea274c47537a0c707a3f7b1e Mon Sep 17 00:00:00 2001 +From: N Balachandran <nbalacha@redhat.com> +Date: Fri, 20 Sep 2019 13:30:42 +0530 +Subject: [PATCH 338/344] glusterfs/fuse: Reduce the default lru-limit value + +The current lru-limit value still uses memory for +upto 128K inodes. +Reduce the default value of lru-limit to 64K. + +> Upstream https://review.gluster.org/23461 +> Change-Id: Ica2dd4f8f5fde45cb5180d8f02c3d86114ac52b3 +> Fixes: bz#1753880 +> Signed-off-by: N Balachandran <nbalacha@redhat.com> +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1763208 +Change-Id: I04ab39b5278e702aacdceebfa5b63702b9f9703b +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/187535 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + doc/mount.glusterfs.8 | 2 +- + glusterfsd/src/glusterfsd.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.c | 2 +- + xlators/mount/fuse/src/fuse-bridge.h | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 +index b35b362..87a5669 100644 +--- a/doc/mount.glusterfs.8 ++++ b/doc/mount.glusterfs.8 +@@ -123,7 +123,7 @@ Provide list of backup volfile servers in the following format [default: None] + .TP + .TP + \fBlru-limit=\fRN +-Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] ++Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536] + .TP + .TP + \fBinvalidate-limit=\fRN +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 0856471..974fb88 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -211,7 +211,7 @@ static struct argp_option gf_options[] = { + "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"}, + {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, + "Set fuse module's limit for number of inodes kept in LRU list to N " +- "[default: 131072]"}, ++ "[default: 65536]"}, + {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, + "Suspend inode invalidations implied by 'lru-limit' if the number of " + "outstanding invalidations reaches N"}, +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 8b2e7f0..ebe5c28 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -6972,7 +6972,7 @@ struct volume_options options[] = { + { + .key = {"lru-limit"}, + .type = GF_OPTION_TYPE_INT, +- .default_value = "131072", ++ .default_value = "65536", + .min = 0, + .description = "makes glusterfs invalidate kernel inodes after " + "reaching this limit (0 means 'unlimited')", +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index 2311582..cf4479c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -189,7 +189,7 @@ struct fuse_private { + gf_boolean_t flush_handle_interrupt; + gf_boolean_t fuse_auto_inval; + +- /* LRU Limit, if not set, default is 128k for now */ ++ /* LRU Limit, if not set, default is 64k for now */ + uint32_t lru_limit; + uint32_t invalidate_limit; + }; +-- +1.8.3.1 + diff --git a/SOURCES/0339-geo-rep-fix-integer-config-validation.patch b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch new file mode 100644 index 0000000..45f3ede --- /dev/null +++ b/SOURCES/0339-geo-rep-fix-integer-config-validation.patch @@ -0,0 +1,93 @@ +From 8b5b3b247a00515d3188453c27b0ba749e93d325 Mon Sep 17 00:00:00 2001 +From: Aravinda VK <avishwan@redhat.com> +Date: Tue, 26 Mar 2019 13:20:13 +0530 +Subject: [PATCH 339/344] geo-rep: fix integer config validation + +ssh-port validation is mentioned as `validation=int` in template +`gsyncd.conf`, but not handled this during geo-rep config set. + +upstream patch: + https://review.gluster.org/#/c/glusterfs/+/22418/ +Backport of: + + >Fixes: bz#1692666 + >Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6 + >Signed-off-by: Aravinda VK <avishwan@redhat.com> + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1782162 +Change-Id: I3f19d9b471b0a3327e4d094dfbefcc58ed2c34f6 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/187533 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/gsyncdconfig.py | 23 ++++++++++++++++++----- + tests/00-geo-rep/georep-basic-dr-rsync.t | 3 +++ + 2 files changed, 21 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py +index f823311..8848071 100644 +--- a/geo-replication/syncdaemon/gsyncdconfig.py ++++ b/geo-replication/syncdaemon/gsyncdconfig.py +@@ -329,6 +329,9 @@ class Gconf(object): + if item["validation"] == "unixtime": + return validate_unixtime(value) + ++ if item["validation"] == "int": ++ return validate_int(value) ++ + return False + + def _is_config_changed(self): +@@ -381,6 +384,14 @@ def config_upgrade(config_file, ret): + config.write(configfile) + + ++def validate_int(value): ++ try: ++ _ = int(value) ++ return True ++ except ValueError: ++ return False ++ ++ + def validate_unixtime(value): + try: + y = datetime.fromtimestamp(int(value)).strftime("%Y") +@@ -393,11 +404,13 @@ def validate_unixtime(value): + + + def validate_minmax(value, minval, maxval): +- value = int(value) +- minval = int(minval) +- maxval = int(maxval) +- +- return value >= minval and value <= maxval ++ try: ++ value = int(value) ++ minval = int(minval) ++ maxval = int(maxval) ++ return value >= minval and value <= maxval ++ except ValueError: ++ return False + + + def validate_choice(value, allowed_values): +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index b432635..b6fbf18 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -71,6 +71,9 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created" + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} + ++#Config Set ssh-port to validate int validation ++TEST $GEOREP_CLI $master $slave config ssh-port 22 ++ + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} + +-- +1.8.3.1 + diff --git a/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch new file mode 100644 index 0000000..54b2706 --- /dev/null +++ b/SOURCES/0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch @@ -0,0 +1,46 @@ +From 0c996d6c40c625f8a0ee6be2c220c89aaf70c840 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Tue, 10 Dec 2019 08:35:23 +0530 +Subject: [PATCH 340/344] rpc: event_slot_alloc converted infinite loop after + reach slot_used to 1024 + +Problem: In the commit faf5ac13c4ee00a05e9451bf8da3be2a9043bbf2 missed one + condition to come out from the loop so after reach the slot_used to + 1024 loop has become infinite loop + +Solution: Correct the code path to avoid the infinite loop + +> Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db +> Fixes: bz#1781440 +> Credits: Xavi Hernandez <xhernandez@redhat.com> +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry picked from commit 8030f9c0f092170ceb50cedf59b9c330022825b7) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23843/) + +Change-Id: Ia02a109571f0d8cc9902c32db3e9b9282ee5c1db +BUG: 1781444 +Credits: Xavi Hernandez <xhernandez@redhat.com> +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/187460 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + libglusterfs/src/event-epoll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c +index 65f5efd..5afb2f2 100644 +--- a/libglusterfs/src/event-epoll.c ++++ b/libglusterfs/src/event-epoll.c +@@ -92,7 +92,7 @@ retry: + while (i < EVENT_EPOLL_TABLES) { + switch (event_pool->slots_used[i]) { + case EVENT_EPOLL_SLOTS: +- continue; ++ break; + case 0: + if (!event_pool->ereg[i]) { + table = __event_newtable(event_pool, i); +-- +1.8.3.1 + diff --git a/SOURCES/0341-socket-fix-error-handling.patch b/SOURCES/0341-socket-fix-error-handling.patch new file mode 100644 index 0000000..0eb68d1 --- /dev/null +++ b/SOURCES/0341-socket-fix-error-handling.patch @@ -0,0 +1,742 @@ +From 2c99b7db00a6238fd43053dd672c8ce519d8fd27 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Wed, 11 Dec 2019 18:21:14 +0100 +Subject: [PATCH 341/344] socket: fix error handling + +When __socket_proto_state_machine() detected a problem in the size of +the request or it couldn't allocate an iobuf of the requested size, it +returned -ENOMEM (-12). However the caller was expecting only -1 in +case of error. For this reason the error passes undetected initially, +adding back the socket to the epoll object. On further processing, +however, the error is finally detected and the connection terminated. +Meanwhile, another thread could receive a poll_in event from the same +connection, which could cause races with the connection destruction. +When this happened, the process crashed. + +To fix this, all error detection conditions have been hardened to be +more strict on what is valid and what not. Also, we don't return +-ENOMEM anymore. We always return -1 in case of error. + +An additional change has been done to prevent destruction of the +transport object while it may still be needed. + +Upstream patch: +> Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23861 +> Fixes: bz#1782495 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: I6e59cd81cbf670f7adfdde942625d4e6c3fbc82d +BUG: 1779696 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/187689 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 173 ++++++++++++++++++---------------- + 1 file changed, 90 insertions(+), 83 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index bf2fa71..f54ca83 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -173,7 +173,7 @@ ssl_setup_connection_params(rpc_transport_t *this); + \ + ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \ + &in->pending_count, &bytes_read); \ +- if (ret == -1) \ ++ if (ret < 0) \ + break; \ + __socket_proto_update_priv_after_read(priv, ret, bytes_read); \ + } +@@ -739,7 +739,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + ret = sys_writev(sock, opvector, IOV_MIN(opcount)); + } + +- if (ret == 0 || (ret == -1 && errno == EAGAIN)) { ++ if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) { + /* done for now */ + break; + } else if (ret > 0) +@@ -754,7 +754,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + errno = ENODATA; + ret = -1; + } +- if (ret == -1 && errno == EAGAIN) { ++ if ((ret < 0) && (errno == EAGAIN)) { + /* done for now */ + break; + } else if (ret > 0) +@@ -770,7 +770,7 @@ __socket_rwv(rpc_transport_t *this, struct iovec *vector, int count, + errno = ENOTCONN; + break; + } +- if (ret == -1) { ++ if (ret < 0) { + if (errno == EINTR) + continue; + +@@ -907,7 +907,7 @@ __socket_disconnect(rpc_transport_t *this) + gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this, + priv->sock); + +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_TRACE, + "tearing down socket connection"); + ret = __socket_teardown_connection(this); +@@ -942,7 +942,7 @@ __socket_server_bind(rpc_transport_t *this) + + ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno)); + } +@@ -955,7 +955,7 @@ __socket_server_bind(rpc_transport_t *this) + if (reuse_check_sock >= 0) { + ret = connect(reuse_check_sock, SA(&unix_addr), + this->myinfo.sockaddr_len); +- if ((ret == -1) && (ECONNREFUSED == errno)) { ++ if ((ret != 0) && (ECONNREFUSED == errno)) { + sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path); + } + gf_log(this->name, GF_LOG_INFO, +@@ -967,7 +967,7 @@ __socket_server_bind(rpc_transport_t *this) + ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr, + this->myinfo.sockaddr_len); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s", + this->myinfo.identifier, strerror(errno)); + if (errno == EADDRINUSE) { +@@ -976,7 +976,7 @@ __socket_server_bind(rpc_transport_t *this) + } + if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) { + if (getsockname(priv->sock, SA(&this->myinfo.sockaddr), +- &this->myinfo.sockaddr_len) == -1) { ++ &this->myinfo.sockaddr_len) != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on (%d) failed (%s)", priv->sock, + strerror(errno)); +@@ -1004,7 +1004,7 @@ __socket_nonblock(int fd) + + flags = fcntl(fd, F_GETFL); + +- if (flags != -1) ++ if (flags >= 0) + ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK); + + return ret; +@@ -1034,7 +1034,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + #endif + + ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep alive option on socket %d", fd); + goto err; +@@ -1051,7 +1051,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl, + sizeof(keepaliveintvl)); + #endif +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep alive interval on socket %d", fd); + goto err; +@@ -1062,7 +1062,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle, + sizeof(keepaliveidle)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep idle %d on socket %d, %s", keepaliveidle, fd, + strerror(errno)); +@@ -1070,7 +1070,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + } + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl, + sizeof(keepaliveintvl)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set keep interval %d on socket %d, %s", + keepaliveintvl, fd, strerror(errno)); +@@ -1082,7 +1082,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + goto done; + ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms, + sizeof(timeout_ms)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set " + "TCP_USER_TIMEOUT %d on socket %d, %s", +@@ -1093,7 +1093,7 @@ __socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle, + #if defined(TCP_KEEPCNT) + ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt, + sizeof(keepalivecnt)); +- if (ret == -1) { ++ if (ret != 0) { + gf_log("socket", GF_LOG_WARNING, + "failed to set " + "TCP_KEEPCNT %d on socket %d, %s", +@@ -1366,7 +1366,7 @@ socket_event_poll_err(rpc_transport_t *this, int gen, int idx) + + pthread_mutex_lock(&priv->out_lock); + { +- if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock != -1)) { ++ if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) { + __socket_ioq_flush(this); + __socket_reset(this); + socket_closed = _gf_true; +@@ -1405,7 +1405,7 @@ socket_event_poll_out(rpc_transport_t *this) + if (priv->connected == 1) { + ret = __socket_ioq_churn(this); + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_TRACE, + "__socket_ioq_churn returned -1; " + "disconnecting socket"); +@@ -1463,7 +1463,7 @@ __socket_read_simple_msg(rpc_transport_t *this) + &bytes_read); + } + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "reading from socket failed. Error (%s), " + "peer (%s)", +@@ -1661,8 +1661,8 @@ __socket_read_vectored_request(rpc_transport_t *this, + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + request->vector_state = SP_STATE_VECTORED_REQUEST_INIT; + in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent - + (unsigned long) +@@ -1739,8 +1739,8 @@ __socket_read_request(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + request->header_state = SP_STATE_REQUEST_HEADER_INIT; + } + +@@ -1870,8 +1870,8 @@ __socket_read_accepted_successful_reply(rpc_transport_t *this) + /* now read the entire remaining msg into new iobuf */ + ret = __socket_read_simple_msg(this); + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + frag->call_body.reply.accepted_success_state = + SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT; + } +@@ -2003,8 +2003,8 @@ __socket_read_accepted_successful_reply_v2(rpc_transport_t *this) + /* now read the entire remaining msg into new iobuf */ + ret = __socket_read_simple_msg(this); + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- RPC_LASTFRAG(in->fraghdr))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ RPC_LASTFRAG(in->fraghdr))) { + frag->call_body.reply.accepted_success_state = + SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT; + } +@@ -2103,8 +2103,8 @@ __socket_read_accepted_reply(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + frag->call_body.reply + .accepted_state = SP_STATE_ACCEPTED_REPLY_INIT; + } +@@ -2169,8 +2169,8 @@ __socket_read_vectored_reply(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + frag->call_body.reply + .status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT; + in->payload_vector.iov_len = (unsigned long)frag->fragcurrent - +@@ -2237,7 +2237,7 @@ __socket_read_reply(rpc_transport_t *this) + /* Transition back to externally visible state. */ + frag->state = SP_STATE_READ_MSGTYPE; + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "notify for event MAP_XID failed for %s", + this->peerinfo.identifier); +@@ -2315,8 +2315,8 @@ __socket_read_frag(rpc_transport_t *this) + + remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read; + +- if ((ret == -1) || ((ret == 0) && (remaining_size == 0) && +- (RPC_LASTFRAG(in->fraghdr)))) { ++ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) && ++ (RPC_LASTFRAG(in->fraghdr)))) { + /* frag->state = SP_STATE_NADA; */ + frag->state = SP_STATE_RPCFRAG_INIT; + } +@@ -2400,7 +2400,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + ret = __socket_readv(this, in->pending_vector, 1, + &in->pending_vector, &in->pending_count, + NULL); +- if (ret == -1) ++ if (ret < 0) + goto out; + + if (ret > 0) { +@@ -2422,7 +2422,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr); + + if (in->total_bytes_read >= GF_UNIT_GB) { +- ret = -ENOMEM; ++ ret = -1; + goto out; + } + +@@ -2430,7 +2430,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + this->ctx->iobuf_pool, + (in->total_bytes_read + sizeof(in->fraghdr))); + if (!iobuf) { +- ret = -ENOMEM; ++ ret = -1; + goto out; + } + +@@ -2457,7 +2457,7 @@ __socket_proto_state_machine(rpc_transport_t *this, + case SP_STATE_READING_FRAG: + ret = __socket_read_frag(this); + +- if ((ret == -1) || ++ if ((ret < 0) || + (frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) { + goto out; + } +@@ -2575,7 +2575,7 @@ socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled) + pthread_mutex_unlock(&priv->notify.lock); + } + +- if (notify_handled && (ret != -1)) ++ if (notify_handled && (ret >= 0)) + event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen); + + if (pollin) { +@@ -2618,10 +2618,10 @@ socket_connect_finish(rpc_transport_t *this) + + ret = __socket_connect_finish(priv->sock); + +- if (ret == -1 && errno == EINPROGRESS) ++ if ((ret < 0) && (errno == EINPROGRESS)) + ret = 1; + +- if (ret == -1 && errno != EINPROGRESS) { ++ if ((ret < 0) && (errno != EINPROGRESS)) { + if (!priv->connect_finish_log) { + gf_log(this->name, GF_LOG_ERROR, + "connection to %s failed (%s); " +@@ -2640,7 +2640,7 @@ socket_connect_finish(rpc_transport_t *this) + + ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on (%d) failed (%s) - " + "disconnecting socket", +@@ -2924,6 +2924,13 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + return; + } + ++ /* At this point we are sure no other thread is using the transport because ++ * we cannot receive more events until we call gf_event_handled(). However ++ * this function may call gf_event_handled() in some cases. When this is ++ * done, the transport may be destroyed at any moment if another thread ++ * handled an error event. To prevent that we take a reference here. */ ++ rpc_transport_ref(this); ++ + GF_VALIDATE_OR_GOTO("socket", this, out); + GF_VALIDATE_OR_GOTO("socket", this->private, out); + GF_VALIDATE_OR_GOTO("socket", this->xl, out); +@@ -2960,7 +2967,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + if (ret > 0) { + gf_log(this->name, GF_LOG_TRACE, + "(sock:%d) returning to wait on socket", priv->sock); +- return; ++ goto out; + } + } else { + char *sock_type = (priv->is_server ? "Server" : "Client"); +@@ -3015,7 +3022,7 @@ socket_event_handler(int fd, int idx, int gen, void *data, int poll_in, + } + + out: +- return; ++ rpc_transport_unref(this); + } + + static void +@@ -3074,7 +3081,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + event_handled(ctx->event_pool, fd, idx, gen); + +- if (new_sock == -1) { ++ if (new_sock < 0) { + gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)", + priv->sock, strerror(errno)); + goto out; +@@ -3082,7 +3089,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) { + ret = __socket_nodelay(new_sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "setsockopt() failed for " + "NODELAY (%s)", +@@ -3094,7 +3101,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + ret = __socket_keepalive(new_sock, new_sockaddr.ss_family, + priv->keepaliveintvl, priv->keepaliveidle, + priv->keepalivecnt, priv->timeout); +- if (ret == -1) ++ if (ret != 0) + gf_log(this->name, GF_LOG_WARNING, + "Failed to set keep-alive: %s", strerror(errno)); + } +@@ -3110,7 +3117,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + } + + ret = pthread_mutex_init(&new_trans->lock, NULL); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "pthread_mutex_init() failed: %s; closing newly accepted " + "socket %d", +@@ -3130,7 +3137,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + + ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr), + &new_trans->myinfo.sockaddr_len); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, + "getsockname on socket %d " + "failed (errno:%s); closing newly accepted socket", +@@ -3237,7 +3244,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + */ + ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans); + +- if (ret != -1) { ++ if (ret >= 0) { + new_priv->idx = event_register( + ctx->event_pool, new_sock, socket_event_handler, new_trans, + 1, 0, new_trans->notify_poller_death); +@@ -3275,7 +3282,7 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + rpc_transport_unref(new_trans); + } + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket"); + sys_close(new_sock); + /* this unref is to actually cause the destruction of +@@ -3406,7 +3413,7 @@ socket_connect(rpc_transport_t *this, int port) + + pthread_mutex_lock(&priv->out_lock); + { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_TRACE, + "connect () called on transport " + "already connected"); +@@ -3420,7 +3427,7 @@ socket_connect(rpc_transport_t *this, int port) + + ret = socket_client_get_remote_sockaddr(this, &sock_union.sa, + &sockaddr_len, &sa_family); +- if (ret == -1) { ++ if (ret < 0) { + /* logged inside client_get_remote_sockaddr */ + goto unlock; + } +@@ -3439,7 +3446,7 @@ socket_connect(rpc_transport_t *this, int port) + this->peerinfo.sockaddr_len = sockaddr_len; + + priv->sock = sys_socket(sa_family, SOCK_STREAM, 0); +- if (priv->sock == -1) { ++ if (priv->sock < 0) { + gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)", + strerror(errno)); + ret = -1; +@@ -3451,7 +3458,7 @@ socket_connect(rpc_transport_t *this, int port) + */ + if (priv->windowsize != 0) { + if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting receive window " + "size failed: %d: %d: %s", +@@ -3459,7 +3466,7 @@ socket_connect(rpc_transport_t *this, int port) + } + + if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting send window size " + "failed: %d: %d: %s", +@@ -3484,7 +3491,7 @@ socket_connect(rpc_transport_t *this, int port) + if (priv->nodelay && (sa_family != AF_UNIX)) { + ret = __socket_nodelay(priv->sock); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "NODELAY on %d failed (%s)", + priv->sock, strerror(errno)); + } +@@ -3494,7 +3501,7 @@ socket_connect(rpc_transport_t *this, int port) + ret = __socket_keepalive(priv->sock, sa_family, + priv->keepaliveintvl, priv->keepaliveidle, + priv->keepalivecnt, priv->timeout); +- if (ret == -1) ++ if (ret != 0) + gf_log(this->name, GF_LOG_ERROR, "Failed to set keep-alive: %s", + strerror(errno)); + } +@@ -3516,7 +3523,7 @@ socket_connect(rpc_transport_t *this, int port) + + ret = client_bind(this, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len, priv->sock); +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s", + strerror(errno)); + goto handler; +@@ -3525,7 +3532,7 @@ socket_connect(rpc_transport_t *this, int port) + /* make socket non-blocking for all types of sockets */ + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)", + priv->sock, strerror(errno)); + goto handler; +@@ -3552,7 +3559,7 @@ socket_connect(rpc_transport_t *this, int port) + + connect_attempted = _gf_true; + +- if (ret == -1 && errno == ENOENT && ign_enoent) { ++ if ((ret != 0) && (errno == ENOENT) && ign_enoent) { + gf_log(this->name, GF_LOG_WARNING, + "Ignore failed connection attempt on %s, (%s) ", + this->peerinfo.identifier, strerror(errno)); +@@ -3570,7 +3577,7 @@ socket_connect(rpc_transport_t *this, int port) + goto handler; + } + +- if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) { ++ if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) { + /* For unix path based sockets, the socket path is + * cryptic (md5sum of path) and may not be useful for + * the user in debugging so log it in DEBUG +@@ -3634,8 +3641,8 @@ socket_connect(rpc_transport_t *this, int port) + pthread_mutex_unlock(&priv->out_lock); + + err: +- /* if sock != -1, then cleanup is done from the event handler */ +- if (ret == -1 && sock == -1) { ++ /* if sock >= 0, then cleanup is done from the event handler */ ++ if ((ret < 0) && (sock < 0)) { + /* Cleaup requires to send notification to upper layer which + intern holds the big_lock. There can be dead-lock situation + if big_lock is already held by the current thread. +@@ -3689,20 +3696,20 @@ socket_listen(rpc_transport_t *this) + } + pthread_mutex_unlock(&priv->out_lock); + +- if (sock != -1) { ++ if (sock >= 0) { + gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening"); + return ret; + } + + ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len, + &sa_family); +- if (ret == -1) { ++ if (ret < 0) { + return ret; + } + + pthread_mutex_lock(&priv->out_lock); + { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + gf_log(this->name, GF_LOG_DEBUG, "already listening"); + goto unlock; + } +@@ -3712,7 +3719,7 @@ socket_listen(rpc_transport_t *this) + + priv->sock = sys_socket(sa_family, SOCK_STREAM, 0); + +- if (priv->sock == -1) { ++ if (priv->sock < 0) { + gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)", + strerror(errno)); + goto unlock; +@@ -3723,7 +3730,7 @@ socket_listen(rpc_transport_t *this) + */ + if (priv->windowsize != 0) { + if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting receive window size " + "failed: %d: %d: %s", +@@ -3731,7 +3738,7 @@ socket_listen(rpc_transport_t *this) + } + + if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize, +- sizeof(priv->windowsize)) < 0) { ++ sizeof(priv->windowsize)) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setting send window size failed:" + " %d: %d: %s", +@@ -3741,7 +3748,7 @@ socket_listen(rpc_transport_t *this) + + if (priv->nodelay && (sa_family != AF_UNIX)) { + ret = __socket_nodelay(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "setsockopt() failed for NODELAY (%s)", strerror(errno)); + } +@@ -3750,7 +3757,7 @@ socket_listen(rpc_transport_t *this) + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "NBIO on socket %d failed " + "(errno:%s); closing socket", +@@ -3763,7 +3770,7 @@ socket_listen(rpc_transport_t *this) + + ret = __socket_server_bind(this); + +- if ((ret == -EADDRINUSE) || (ret == -1)) { ++ if (ret < 0) { + /* logged inside __socket_server_bind() */ + gf_log(this->name, GF_LOG_ERROR, + "__socket_server_bind failed;" +@@ -3779,7 +3786,7 @@ socket_listen(rpc_transport_t *this) + + ret = listen(priv->sock, priv->backlog); + +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "could not set socket %d to listen mode (errno:%s); " + "closing socket", +@@ -4025,7 +4032,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + priv = this->private; + + if (dict_get_str(options, "transport.socket.keepalive", &optstr) == 0) { +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.keepalive' takes only " + "boolean options, not taking any action"); +@@ -4094,7 +4101,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + if (dict_get(options, "non-blocking-io")) { + optstr = data_to_str(dict_get(options, "non-blocking-io")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); +@@ -4109,7 +4116,7 @@ reconfigure(rpc_transport_t *this, dict_t *options) + + if (!priv->bio) { + ret = __socket_nonblock(priv->sock); +- if (ret == -1) { ++ if (ret != 0) { + gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)", + priv->sock, strerror(errno)); + goto out; +@@ -4508,7 +4515,7 @@ socket_init(rpc_transport_t *this) + if (dict_get(this->options, "non-blocking-io")) { + optstr = data_to_str(dict_get(this->options, "non-blocking-io")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); +@@ -4528,7 +4535,7 @@ socket_init(rpc_transport_t *this) + optstr = data_to_str( + dict_get(this->options, "transport.socket.nodelay")); + +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.nodelay' takes only " + "boolean options, not taking any action"); +@@ -4559,7 +4566,7 @@ socket_init(rpc_transport_t *this) + priv->keepalivecnt = GF_KEEPALIVE_COUNT; + if (dict_get_str(this->options, "transport.socket.keepalive", &optstr) == + 0) { +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "'transport.socket.keepalive' takes only " + "boolean options, not taking any action"); +@@ -4609,7 +4616,7 @@ socket_init(rpc_transport_t *this) + if (dict_get(this->options, "transport.socket.read-fail-log")) { + optstr = data_to_str( + dict_get(this->options, "transport.socket.read-fail-log")); +- if (gf_string2boolean(optstr, &tmp_bool) == -1) { ++ if (gf_string2boolean(optstr, &tmp_bool) != 0) { + gf_log(this->name, GF_LOG_WARNING, + "'transport.socket.read-fail-log' takes only " + "boolean options; logging socket read fails"); +@@ -4646,7 +4653,7 @@ fini(rpc_transport_t *this) + + priv = this->private; + if (priv) { +- if (priv->sock != -1) { ++ if (priv->sock >= 0) { + pthread_mutex_lock(&priv->out_lock); + { + __socket_ioq_flush(this); +@@ -4683,7 +4690,7 @@ init(rpc_transport_t *this) + + ret = socket_init(this); + +- if (ret == -1) { ++ if (ret < 0) { + gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed"); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch new file mode 100644 index 0000000..028a227 --- /dev/null +++ b/SOURCES/0342-Revert-hooks-remove-selinux-hooks.patch @@ -0,0 +1,120 @@ +From eb37a3b57415d2d4206ecdd2db10530366a0d1b1 Mon Sep 17 00:00:00 2001 +From: Anoop C S <anoopcs@redhat.com> +Date: Fri, 13 Dec 2019 15:20:27 +0530 +Subject: [PATCH 342/344] Revert "hooks: remove selinux hooks" + +This reverts commit 421743b7cfa6a249544f6abb4cca5a612bd20ea1. + +Note:- We are not bringing back features.selinux but just the hooks for + setting SELinux context on bricks + +Label: DOWNSTREAM ONLY + +Change-Id: Iccc10428361cac59b294e1d7aa1ba8187c20029e +BUG: 1686800 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/187691 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Niels de Vos <ndevos@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + configure.ac | 4 ++++ + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/create/Makefile.am | 1 + + extras/hook-scripts/create/post/Makefile.am | 6 ++++++ + extras/hook-scripts/delete/Makefile.am | 1 + + extras/hook-scripts/delete/pre/Makefile.am | 6 ++++++ + glusterfs.spec.in | 2 ++ + 7 files changed, 21 insertions(+), 1 deletion(-) + create mode 100644 extras/hook-scripts/create/Makefile.am + create mode 100644 extras/hook-scripts/create/post/Makefile.am + create mode 100644 extras/hook-scripts/delete/Makefile.am + create mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 327733e..98ee311 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -221,6 +221,10 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile ++ extras/hook-scripts/create/Makefile ++ extras/hook-scripts/create/post/Makefile ++ extras/hook-scripts/delete/Makefile ++ extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 771b37e..26059d7 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick set start stop reset ++SUBDIRS = add-brick create delete set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +new file mode 100644 +index 0000000..b083a91 +--- /dev/null ++++ b/extras/hook-scripts/create/Makefile.am +@@ -0,0 +1 @@ ++SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +new file mode 100644 +index 0000000..919801a +--- /dev/null ++++ b/extras/hook-scripts/create/post/Makefile.am +@@ -0,0 +1,6 @@ ++EXTRA_DIST = S10selinux-label-brick.sh ++ ++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ ++if WITH_SERVER ++scripts_SCRIPTS = S10selinux-label-brick.sh ++endif +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +new file mode 100644 +index 0000000..c98a05d +--- /dev/null ++++ b/extras/hook-scripts/delete/Makefile.am +@@ -0,0 +1 @@ ++SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +new file mode 100644 +index 0000000..93a6b85 +--- /dev/null ++++ b/extras/hook-scripts/delete/pre/Makefile.am +@@ -0,0 +1,6 @@ ++EXTRA_DIST = S10selinux-del-fcontext.sh ++ ++scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ ++if WITH_SERVER ++scripts_SCRIPTS = S10selinux-del-fcontext.sh ++endif +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 1b975b2..012989a 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1453,6 +1453,7 @@ exit 0 + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1461,6 +1462,7 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre ++ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +-- +1.8.3.1 + diff --git a/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch new file mode 100644 index 0000000..77d2f64 --- /dev/null +++ b/SOURCES/0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch @@ -0,0 +1,155 @@ +From 8a8c508b529f7609fc5caa10bc79ba817f5d274a Mon Sep 17 00:00:00 2001 +From: Milan Zink <mzink@redhat.com> +Date: Mon, 5 Feb 2018 15:04:37 +0100 +Subject: [PATCH 343/344] extras/hooks: syntactical errors in SELinux hooks, + scipt logic improved + +Backport of https://review.gluster.org/c/glusterfs/+/19502 + +Change-Id: Ia5fa1df81bbaec3a84653d136a331c76b457f42c +BUG: 1686800 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/187692 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Niels de Vos <ndevos@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../create/post/S10selinux-label-brick.sh | 13 +++-- + .../delete/pre/S10selinux-del-fcontext.sh | 60 +++++++++++++--------- + tests/bugs/glusterfs-server/bug-877992.t | 4 +- + 3 files changed, 46 insertions(+), 31 deletions(-) + +diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +index de242d2..f9b4b1a 100755 +--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh ++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +@@ -34,18 +34,21 @@ parse_args () { + + set_brick_labels() + { +- volname=${1} ++ volname="${1}" + + # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/*" +- brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u) ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) + + for b in ${brickdirs}; do + # Add a file context for each brick path and associate with the + # glusterd_brick_t SELinux type. +- pattern="${b}\(/.*\)?" ++ pattern="${b}(/.*)?" + semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}" +- + # Set the labels on the new brick path. + restorecon -R "${b}" + done +diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +index 6eba66f..e7f4e8f 100755 +--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh ++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +@@ -15,45 +15,55 @@ OPTSPEC="volname:" + VOL= + + function parse_args () { +- ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@") +- eval set -- "$ARGS" +- +- while true; do +- case $1 in +- --volname) +- shift +- VOL=$1 +- ;; +- *) +- shift +- break +- ;; +- esac ++ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") ++ eval set -- "${ARGS}" ++ ++ while true; do ++ case ${1} in ++ --volname) ++ shift ++ VOL=${1} ++ ;; ++ *) + shift +- done ++ break ++ ;; ++ esac ++ shift ++ done + } + + function delete_brick_fcontext() + { +- volname=$1 ++ volname="${1}" ++ ++ # grab the path for each local brick ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/" ++ brickdirs=$( ++ find "${brickpath}" -type f -exec grep '^path=' {} \; | \ ++ cut -d= -f 2 | \ ++ sort -u ++ ) ++ ++ for b in ${brickdirs} ++ do ++ # remove the file context associated with the brick path ++ pattern="${b}(/.*)?" ++ semanage fcontext --delete "${pattern}" + +- # grab the path for each local brick +- brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2) ++ # remove the labels on brick path. ++ restorecon -R "${b}" ++ done + +- for b in $brickdirs +- do +- # remove the file context associated with the brick path +- semanage fcontext --delete $b\(/.*\)? +- done + } + + SELINUX_STATE=$(which getenforce && getenforce) + [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 + + parse_args "$@" +-[ -z "$VOL" ] && exit 1 ++[ -z "${VOL}" ] && exit 1 + +-delete_brick_fcontext $VOL ++delete_brick_fcontext "${VOL}" + + # failure to delete the fcontext is not fatal + exit 0 +diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t +index aeb73ed..300000b 100755 +--- a/tests/bugs/glusterfs-server/bug-877992.t ++++ b/tests/bugs/glusterfs-server/bug-877992.t +@@ -46,7 +46,9 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1; + EXPECT "$V0" volinfo_field $V0 'Volume Name'; + EXPECT 'Created' volinfo_field $V0 'Status'; + EXPECT 'createPre' cat /tmp/pre.out; +-EXPECT 'createPost' cat /tmp/post.out; ++# Spost.sh comes after S10selinux-label-brick.sh under create post hook script ++# list. So consider the delay in setting SELinux context on bricks ++EXPECT_WITHIN 5 'createPost' cat /tmp/post.out; + hooks_cleanup 'create' + + +-- +1.8.3.1 + diff --git a/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch new file mode 100644 index 0000000..341aeae --- /dev/null +++ b/SOURCES/0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch @@ -0,0 +1,412 @@ +From 02a93265fe4e78e7fc3fa8c6caa773cbe02f50b6 Mon Sep 17 00:00:00 2001 +From: Anoop C S <anoopcs@redhat.com> +Date: Fri, 20 Dec 2019 16:01:59 +0530 +Subject: [PATCH 344/344] Revert all fixes to include SELinux hook scripts + +Following are the reverts included with this change: + +Revert "extras/hooks: syntactical errors in SELinux hooks, scipt logic improved" +Revert "Revert "hooks: remove selinux hooks"" +Revert "tests: subdir-mount.t is failing for brick_mux regrssion" +Revert "extras/hooks: Install and package newly added post add-brick hook script" +Revert "extras/hooks: Add SELinux label on new bricks during add-brick" + +Label: DOWNSTREAM ONLY + +See bug for more details. + +Change-Id: I5c9b9e0e6446568ce16af17257fa39338198a827 +BUG: 1686800 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/188169 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + configure.ac | 4 - + extras/hook-scripts/Makefile.am | 2 +- + extras/hook-scripts/add-brick/post/Makefile.am | 4 +- + .../add-brick/post/S10selinux-label-brick.sh | 100 --------------------- + extras/hook-scripts/create/Makefile.am | 1 - + extras/hook-scripts/create/post/Makefile.am | 6 -- + .../create/post/S10selinux-label-brick.sh | 13 ++- + extras/hook-scripts/delete/Makefile.am | 1 - + extras/hook-scripts/delete/pre/Makefile.am | 6 -- + .../delete/pre/S10selinux-del-fcontext.sh | 60 ++++++------- + glusterfs.spec.in | 3 - + tests/bugs/glusterfs-server/bug-877992.t | 4 +- + tests/features/subdir-mount.t | 11 +-- + 13 files changed, 37 insertions(+), 178 deletions(-) + delete mode 100755 extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh + delete mode 100644 extras/hook-scripts/create/Makefile.am + delete mode 100644 extras/hook-scripts/create/post/Makefile.am + delete mode 100644 extras/hook-scripts/delete/Makefile.am + delete mode 100644 extras/hook-scripts/delete/pre/Makefile.am + +diff --git a/configure.ac b/configure.ac +index 98ee311..327733e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -221,10 +221,6 @@ AC_CONFIG_FILES([Makefile + extras/hook-scripts/add-brick/Makefile + extras/hook-scripts/add-brick/pre/Makefile + extras/hook-scripts/add-brick/post/Makefile +- extras/hook-scripts/create/Makefile +- extras/hook-scripts/create/post/Makefile +- extras/hook-scripts/delete/Makefile +- extras/hook-scripts/delete/pre/Makefile + extras/hook-scripts/start/Makefile + extras/hook-scripts/start/post/Makefile + extras/hook-scripts/set/Makefile +diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am +index 26059d7..771b37e 100644 +--- a/extras/hook-scripts/Makefile.am ++++ b/extras/hook-scripts/Makefile.am +@@ -1,5 +1,5 @@ + EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh +-SUBDIRS = add-brick create delete set start stop reset ++SUBDIRS = add-brick set start stop reset + + scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/ + if USE_GEOREP +diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am +index 9b236df..bfc0c1c 100644 +--- a/extras/hook-scripts/add-brick/post/Makefile.am ++++ b/extras/hook-scripts/add-brick/post/Makefile.am +@@ -1,6 +1,6 @@ +-EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh ++EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh + + hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ + if WITH_SERVER +-hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh ++hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh + endif +diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh +deleted file mode 100755 +index 4a17c99..0000000 +--- a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh ++++ /dev/null +@@ -1,100 +0,0 @@ +-#!/bin/bash +-# +-# Install to hooks/<HOOKS_VER>/add-brick/post +-# +-# Add an SELinux file context for each brick using the glusterd_brick_t type. +-# This ensures that the brick is relabeled correctly on an SELinux restart or +-# restore. Subsequently, run a restore on the brick path to set the selinux +-# labels. +-# +-### +- +-PROGNAME="Sselinux" +-OPTSPEC="volname:,version:,gd-workdir:,volume-op:" +-VOL= +- +-parse_args () { +- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") +- eval set -- "${ARGS}" +- +- while true; do +- case ${1} in +- --volname) +- shift +- VOL=${1} +- ;; +- --gd-workdir) +- shift +- GLUSTERD_WORKDIR=$1 +- ;; +- --version) +- shift +- ;; +- --volume-op) +- shift +- ;; +- *) +- shift +- break +- ;; +- esac +- shift +- done +-} +- +-set_brick_labels() +-{ +- local volname="${1}" +- local fctx +- local list=() +- +- fctx="$(semanage fcontext --list -C)" +- +- # wait for new brick path to be updated under +- # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ +- sleep 5 +- +- # grab the path for each local brick +- brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) +- +- # create a list of bricks for which custom SELinux +- # label doesn't exist +- for b in ${brickdirs}; do +- pattern="${b}(/.*)?" +- echo "${fctx}" | grep "^${pattern}\s" >/dev/null +- if [[ $? -ne 0 ]]; then +- list+=("${pattern}") +- fi +- done +- +- # Add a file context for each brick path in the list and associate with the +- # glusterd_brick_t SELinux type. +- for p in ${list[@]} +- do +- semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" +- done +- +- # Set the labels for which SELinux label was added above +- for b in ${brickdirs} +- do +- echo "${list[@]}" | grep "${b}" >/dev/null +- if [[ $? -eq 0 ]]; then +- restorecon -R "${b}" +- fi +- done +-} +- +-SELINUX_STATE=$(which getenforce && getenforce) +-[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 +- +-parse_args "$@" +-[ -z "${VOL}" ] && exit 1 +- +-set_brick_labels "${VOL}" +- +-exit 0 +diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am +deleted file mode 100644 +index b083a91..0000000 +--- a/extras/hook-scripts/create/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = post +diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am +deleted file mode 100644 +index 919801a..0000000 +--- a/extras/hook-scripts/create/post/Makefile.am ++++ /dev/null +@@ -1,6 +0,0 @@ +-EXTRA_DIST = S10selinux-label-brick.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/ +-if WITH_SERVER +-scripts_SCRIPTS = S10selinux-label-brick.sh +-endif +diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +index f9b4b1a..de242d2 100755 +--- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh ++++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh +@@ -34,21 +34,18 @@ parse_args () { + + set_brick_labels() + { +- volname="${1}" ++ volname=${1} + + # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) ++ brickpath="/var/lib/glusterd/vols/${volname}/bricks/*" ++ brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u) + + for b in ${brickdirs}; do + # Add a file context for each brick path and associate with the + # glusterd_brick_t SELinux type. +- pattern="${b}(/.*)?" ++ pattern="${b}\(/.*\)?" + semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}" ++ + # Set the labels on the new brick path. + restorecon -R "${b}" + done +diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am +deleted file mode 100644 +index c98a05d..0000000 +--- a/extras/hook-scripts/delete/Makefile.am ++++ /dev/null +@@ -1 +0,0 @@ +-SUBDIRS = pre +diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am +deleted file mode 100644 +index 93a6b85..0000000 +--- a/extras/hook-scripts/delete/pre/Makefile.am ++++ /dev/null +@@ -1,6 +0,0 @@ +-EXTRA_DIST = S10selinux-del-fcontext.sh +- +-scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/ +-if WITH_SERVER +-scripts_SCRIPTS = S10selinux-del-fcontext.sh +-endif +diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +index e7f4e8f..6eba66f 100755 +--- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh ++++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +@@ -15,55 +15,45 @@ OPTSPEC="volname:" + VOL= + + function parse_args () { +- ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") +- eval set -- "${ARGS}" +- +- while true; do +- case ${1} in +- --volname) +- shift +- VOL=${1} +- ;; +- *) ++ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@") ++ eval set -- "$ARGS" ++ ++ while true; do ++ case $1 in ++ --volname) ++ shift ++ VOL=$1 ++ ;; ++ *) ++ shift ++ break ++ ;; ++ esac + shift +- break +- ;; +- esac +- shift +- done ++ done + } + + function delete_brick_fcontext() + { +- volname="${1}" +- +- # grab the path for each local brick +- brickpath="/var/lib/glusterd/vols/${volname}/bricks/" +- brickdirs=$( +- find "${brickpath}" -type f -exec grep '^path=' {} \; | \ +- cut -d= -f 2 | \ +- sort -u +- ) +- +- for b in ${brickdirs} +- do +- # remove the file context associated with the brick path +- pattern="${b}(/.*)?" +- semanage fcontext --delete "${pattern}" ++ volname=$1 + +- # remove the labels on brick path. +- restorecon -R "${b}" +- done ++ # grab the path for each local brick ++ brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2) + ++ for b in $brickdirs ++ do ++ # remove the file context associated with the brick path ++ semanage fcontext --delete $b\(/.*\)? ++ done + } + + SELINUX_STATE=$(which getenforce && getenforce) + [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 + + parse_args "$@" +-[ -z "${VOL}" ] && exit 1 ++[ -z "$VOL" ] && exit 1 + +-delete_brick_fcontext "${VOL}" ++delete_brick_fcontext $VOL + + # failure to delete the fcontext is not fatal + exit 0 +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 012989a..671ee27 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1447,13 +1447,11 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +@@ -1462,7 +1460,6 @@ exit 0 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre +- %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post + %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t +index 300000b..aeb73ed 100755 +--- a/tests/bugs/glusterfs-server/bug-877992.t ++++ b/tests/bugs/glusterfs-server/bug-877992.t +@@ -46,9 +46,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}1; + EXPECT "$V0" volinfo_field $V0 'Volume Name'; + EXPECT 'Created' volinfo_field $V0 'Status'; + EXPECT 'createPre' cat /tmp/pre.out; +-# Spost.sh comes after S10selinux-label-brick.sh under create post hook script +-# list. So consider the delay in setting SELinux context on bricks +-EXPECT_WITHIN 5 'createPost' cat /tmp/post.out; ++EXPECT 'createPost' cat /tmp/post.out; + hooks_cleanup 'create' + + +diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t +index a02bd6b..8401946 100644 +--- a/tests/features/subdir-mount.t ++++ b/tests/features/subdir-mount.t +@@ -85,17 +85,12 @@ TEST $CLI volume start $V0 + TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2 + TEST stat $M2 + +-initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` + # mount shouldn't fail even after add-brick + TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6}; + +-# Wait to execute create-subdir-mounts.sh script by glusterd +-newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` +-while [ $newcnt -eq $initcnt ] +-do +- newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l` +- sleep 1 +-done ++# Give time for client process to get notified and use the new ++# volfile after add-brick ++sleep 1 + + # Existing mount should still be active + mount_inode=$(stat --format "%i" "$M2") +-- +1.8.3.1 + diff --git a/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch new file mode 100644 index 0000000..48b0cc8 --- /dev/null +++ b/SOURCES/0345-read-ahead-io-cache-turn-off-by-default.patch @@ -0,0 +1,82 @@ +From d45c64e17e1eb8003ac1086cbd3abea32414c7f9 Mon Sep 17 00:00:00 2001 +From: Raghavendra Gowdappa <rgowdapp@redhat.com> +Date: Tue, 12 Feb 2019 18:33:44 +0530 +Subject: [PATCH 345/346] read-ahead/io-cache: turn off by default + +We've found perf xlators io-cache and read-ahead not adding any +performance improvement. At best read-ahead is redundant due to kernel +read-ahead and at worst io-cache is degrading the performance for +workloads that doesn't involve re-read. Given that VFS already have +both these functionalities, this patch makes these two +translators turned off by default for native fuse mounts. + +For non-native fuse mounts like gfapi (NFS-ganesha/samba) we can have +these xlators on by having custom profiles. + +>Change-Id: Ie7535788909d4c741844473696f001274dc0bb60 +>Signed-off-by: Raghavendra Gowdappa <rgowdapp@redhat.com> +>fixes: bz#1676479 +Upstream fix link: https://review.gluster.org/#/c/glusterfs/+/22203/ + +BUG: 1788656 +Change-Id: Ie7535788909d4c741844473696f001274dc0bb60 +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/188967 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + tests/basic/ec/self-heal.t | 2 ++ + tests/basic/glusterd/volfile_server_switch.t | 2 +- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 ++-- + 3 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t +index d217559..6329bb6 100644 +--- a/tests/basic/ec/self-heal.t ++++ b/tests/basic/ec/self-heal.t +@@ -131,6 +131,8 @@ TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5} + TEST $CLI volume set $V0 client-log-level DEBUG + #Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp' + TEST $CLI volume set $V0 performance.write-behind off ++#md-cache can return stale stat due to default timeout being 1 sec ++TEST $CLI volume set $V0 performance.stat-prefetch off + EXPECT "Created" volinfo_field $V0 'Status' + TEST $CLI volume start $V0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status' +diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t +index 3090609..e11cfed 100644 +--- a/tests/basic/glusterd/volfile_server_switch.t ++++ b/tests/basic/glusterd/volfile_server_switch.t +@@ -34,7 +34,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --vol + + TEST kill_glusterd 1 + +-TEST $CLI_2 volume set $V0 performance.io-cache off ++TEST $CLI_2 volume set $V0 performance.write-behind off + + # make sure by this time directory will be created + # TODO: suggest ideal time to wait +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 16601a2..9001b88 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -2235,7 +2235,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.read-ahead", + .voltype = "performance/read-ahead", + .option = "!perf", +- .value = "on", ++ .value = "off", + .op_version = 1, + .description = "enable/disable read-ahead translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT}, +@@ -2249,7 +2249,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { + {.key = "performance.io-cache", + .voltype = "performance/io-cache", + .option = "!perf", +- .value = "on", ++ .value = "off", + .op_version = 1, + .description = "enable/disable io-cache translator in the volume.", + .flags = VOLOPT_FLAG_CLIENT_OPT}, +-- +1.8.3.1 + diff --git a/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch new file mode 100644 index 0000000..9fca79e --- /dev/null +++ b/SOURCES/0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch @@ -0,0 +1,223 @@ +From e2af9793014ad67859aa73088765a52307cbe466 Mon Sep 17 00:00:00 2001 +From: Csaba Henk <csaba@redhat.com> +Date: Tue, 7 Jan 2020 19:43:05 +0100 +Subject: [PATCH 346/346] fuse: degrade logging of write failure to fuse device + +Problem: + +FUSE uses failures of communicating with /dev/fuse with various +errnos to indicate in-kernel conditions to userspace. Some of these +shouldn't be handled as an application error. Also the standard +POSIX errno description should not be shown as they are misleading +in this context. + +Solution: + +When writing to the fuse device, the caller of the respective +convenience routine can mask those errnos which don't qualify to +be an error for the application in that context, so then those +shall be reported at DEBUG level. + +The possible non-standard errnos are reported with their +POSIX name instead of their description to avoid confusion. +(Eg. for ENOENT we don't log "no such file or directory", +we log indeed literal "ENOENT".) + +Upstream on https://review.gluster.org/23974 +> Change-Id: I510158843e4b1d482bdc496c2e97b1860dc1ba93 +> updates: bz#1193929 +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1763208 +Change-Id: Ib1676bb334ed153ce74ae1c0413fc0e58fb388c7 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/189056 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 78 +++++++++++++++++++++++++++++++++--- + xlators/mount/fuse/src/fuse-bridge.h | 9 ++++- + 2 files changed, 80 insertions(+), 7 deletions(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index ebe5c28..6e99053 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -198,7 +198,7 @@ fusedump_setup_meta(struct iovec *iovs, char *dir, + + static int + check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, +- ssize_t res) ++ ssize_t res, errnomask_t errnomask) + { + char w = 'W'; + struct iovec diov[4] = { +@@ -216,8 +216,59 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + struct fuse_out_header *fouh = NULL; + + if (res == -1) { +- gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR, +- "writing to fuse device failed: %s", strerror(errno)); ++ const char *errdesc = NULL; ++ gf_loglevel_t loglevel = GF_LOG_ERROR; ++ ++ /* If caller masked the errno, then it ++ * does not indicate an error at the application ++ * level, so we degrade the log severity to DEBUG. ++ */ ++ if (errnomask && errno < ERRNOMASK_MAX && ++ GET_ERRNO_MASK(errnomask, errno)) ++ loglevel = GF_LOG_DEBUG; ++ ++ switch (errno) { ++ /* The listed errnos are FUSE status indicators, ++ * not legit values according to POSIX (see write(3p)), ++ * so resolving them according to the standard ++ * POSIX interpretation would be misleading. ++ */ ++ case ENOENT: ++ errdesc = "ENOENT"; ++ break; ++ case ENOTDIR: ++ errdesc = "ENOTDIR"; ++ break; ++ case ENODEV: ++ errdesc = "ENODEV"; ++ break; ++ case EPERM: ++ errdesc = "EPERM"; ++ break; ++ case ENOMEM: ++ errdesc = "ENOMEM"; ++ break; ++ case ENOTCONN: ++ errdesc = "ENOTCONN"; ++ break; ++ case ECONNREFUSED: ++ errdesc = "ECONNREFUSED"; ++ break; ++ case EOVERFLOW: ++ errdesc = "EOVERFLOW"; ++ break; ++ case EBUSY: ++ errdesc = "EBUSY"; ++ break; ++ case ENOTEMPTY: ++ errdesc = "ENOTEMPTY"; ++ break; ++ default: ++ errdesc = strerror(errno); ++ } ++ ++ gf_log_callingfn("glusterfs-fuse", loglevel, ++ "writing to fuse device failed: %s", errdesc); + return errno; + } + +@@ -282,7 +333,7 @@ send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out, + gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res, + fouh->len, res == -1 ? strerror(errno) : ""); + +- return check_and_dump_fuse_W(priv, iov_out, count, res); ++ return check_and_dump_fuse_W(priv, iov_out, count, res, NULL); + } + + static int +@@ -353,6 +404,15 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_ENTRY; + ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOENT); ++ if (ENOTDIR < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOTDIR); ++ if (EBUSY < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, EBUSY); ++ if (ENOTEMPTY < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOTEMPTY); ++ + if (dentry->name) { + nlen = strlen(dentry->name); + fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; +@@ -437,6 +497,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) + fniio->off = 0; + fniio->len = -1; + ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(node->errnomask, ENOENT); ++ + fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, + uuid_utoa(inode->gfid)); + gf_log("glusterfs-fuse", GF_LOG_TRACE, +@@ -482,6 +545,7 @@ fuse_timed_message_new(void) + /* should be NULL if not set */ + dmsg->fuse_message_body = NULL; + INIT_LIST_HEAD(&dmsg->next); ++ memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask)); + + return dmsg; + } +@@ -680,6 +744,8 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg, + dmsg->fuse_out_header.unique = finh->unique; + dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header); + dmsg->fuse_out_header.error = -EAGAIN; ++ if (ENOENT < ERRNOMASK_MAX) ++ MASK_ERRNO(dmsg->errnomask, ENOENT); + timespec_now(&dmsg->scheduled_ts); + timespec_adjust_delta(&dmsg->scheduled_ts, + (struct timespec){0, 10000000}); +@@ -4848,7 +4914,7 @@ notify_kernel_loop(void *data) + iov_out.iov_base = node->inval_buf; + iov_out.iov_len = len; + rv = sys_writev(priv->fd, &iov_out, 1); +- check_and_dump_fuse_W(priv, &iov_out, 1, rv); ++ check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask); + + GF_FREE(node); + +@@ -4940,7 +5006,7 @@ timed_response_loop(void *data) + iovs[1] = (struct iovec){dmsg->fuse_message_body, + len - sizeof(struct fuse_out_header)}; + rv = sys_writev(priv->fd, iovs, 2); +- check_and_dump_fuse_W(priv, iovs, 2, rv); ++ check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask); + + fuse_timed_message_free(dmsg); + +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index cf4479c..d2d462c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -195,14 +195,20 @@ struct fuse_private { + }; + typedef struct fuse_private fuse_private_t; + ++typedef uint64_t errnomask_t[2]; ++#define MASK_ERRNO(mask, n) ((mask)[(n) >> 6] |= ((uint64_t)1 << ((n)&63))) ++#define GET_ERRNO_MASK(mask, n) ((mask)[(n) >> 6] & ((uint64_t)1 << ((n)&63))) ++#define ERRNOMASK_MAX (64 * (sizeof(errnomask_t) / sizeof(uint64_t))) ++ + #define INVAL_BUF_SIZE \ + (sizeof(struct fuse_out_header) + \ + max(sizeof(struct fuse_notify_inval_inode_out), \ + sizeof(struct fuse_notify_inval_entry_out) + NAME_MAX + 1)) + + struct fuse_invalidate_node { +- char inval_buf[INVAL_BUF_SIZE]; ++ errnomask_t errnomask; + struct list_head next; ++ char inval_buf[INVAL_BUF_SIZE]; + }; + typedef struct fuse_invalidate_node fuse_invalidate_node_t; + +@@ -210,6 +216,7 @@ struct fuse_timed_message { + struct fuse_out_header fuse_out_header; + void *fuse_message_body; + struct timespec scheduled_ts; ++ errnomask_t errnomask; + struct list_head next; + }; + typedef struct fuse_timed_message fuse_timed_message_t; +-- +1.8.3.1 + diff --git a/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch new file mode 100644 index 0000000..ff5251d --- /dev/null +++ b/SOURCES/0347-tools-glusterfind-handle-offline-bricks.patch @@ -0,0 +1,236 @@ +From 87e6ea2cd63898c5d243b0f0c719f4f6347fb829 Mon Sep 17 00:00:00 2001 +From: Milind Changire <mchangir@redhat.com> +Date: Thu, 5 Jan 2017 19:53:19 +0530 +Subject: [PATCH 347/349] tools/glusterfind: handle offline bricks + +Problem: +glusterfind is unable to copy remote output file to local node when a +remove-brick is in progress on the remote node. After copying remote +files, in the --full output listing path, a "sort -u" command is run on +the collected files. However, "sort" exits with an error code if it +finds any file missing. + +Solution: +Maintain a map of (pid, output file) when the node commands are started +and remove the mapping for the pid for which the command returns an +error. Use the list of files present in the map for the "sort" command. + +Backport of: +> Patch: https://review.gluster.org/16332 +> Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b +> fixes: bz#1410439 +> Signed-off-by: Milind Changire <mchangir@redhat.com> +> Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +BUG: 1789447 +Change-Id: Ie6e019037379f4cb163f24b1c65eb382efc2fb3b +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/189214 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunny Kumar <sunkumar@redhat.com> +--- + tools/glusterfind/src/gfind_py2py3.py | 25 ++++++++++++++ + tools/glusterfind/src/main.py | 61 +++++++++++++++++++++-------------- + 2 files changed, 61 insertions(+), 25 deletions(-) + +diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py +index 1d41ec5..87324fb 100644 +--- a/tools/glusterfind/src/gfind_py2py3.py ++++ b/tools/glusterfind/src/gfind_py2py3.py +@@ -40,6 +40,19 @@ if sys.version_info >= (3,): + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile.encode()) + ++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): ++ f.write(u"{0}{1}{2}{3}{4}\n".format(row, ++ field_separator, ++ p_rep, ++ field_separator, ++ row_2_rep)) ++ ++ def gfind_write(f, row, field_separator, p_rep): ++ f.write(u"{0}{1}{2}\n".format(row, ++ field_separator, ++ p_rep)) ++ ++ + else: + + # Raw conversion of bytearray to string +@@ -61,3 +74,15 @@ else: + + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile) ++ ++ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): ++ f.write(u"{0}{1}{2}{3}{4}\n".format(row, ++ field_separator, ++ p_rep, ++ field_separator, ++ row_2_rep).encode()) ++ ++ def gfind_write(f, row, field_separator, p_rep): ++ f.write(u"{0}{1}{2}\n".format(row, ++ field_separator, ++ p_rep).encode()) +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index cc5a86f..fefe4a3 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -16,6 +16,7 @@ from multiprocessing import Process + import os + import xml.etree.cElementTree as etree + from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action ++from gfind_py2py3 import gfind_write_row, gfind_write + import logging + import shutil + import tempfile +@@ -35,9 +36,9 @@ GlusterFS Incremental API + ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError + + logger = logging.getLogger() +-node_outfiles = [] + vol_statusStr = "" + gtmpfilename = None ++g_pid_nodefile_map = {} + + + class StoreAbsPath(Action): +@@ -111,7 +112,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts): + + + def run_cmd_nodes(task, args, **kwargs): +- global node_outfiles ++ global g_pid_nodefile_map + nodes = get_nodes(args.volume) + pool = [] + for num, node in enumerate(nodes): +@@ -142,7 +143,6 @@ def run_cmd_nodes(task, args, **kwargs): + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" + +- node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) +@@ -180,7 +180,6 @@ def run_cmd_nodes(task, args, **kwargs): + if tag == "": + tag = '""' if not is_host_local(host_uuid) else "" + +- node_outfiles.append(node_outfile) + # remote file will be copied into this directory + mkdirp(os.path.dirname(node_outfile), + exit_on_err=True, logger=logger) +@@ -264,6 +263,7 @@ def run_cmd_nodes(task, args, **kwargs): + args=(host, host_uuid, task, cmd, args, opts)) + p.start() + pool.append(p) ++ g_pid_nodefile_map[p.pid] = node_outfile + + for num, p in enumerate(pool): + p.join() +@@ -271,8 +271,11 @@ def run_cmd_nodes(task, args, **kwargs): + logger.warn("Command %s failed in %s" % (task, nodes[num][1])) + if task in ["create", "delete"]: + fail("Command %s failed in %s" % (task, nodes[num][1])) +- elif task == "pre" and args.disable_partial: +- sys.exit(1) ++ elif task == "pre" or task == "query": ++ if args.disable_partial: ++ sys.exit(1) ++ else: ++ del g_pid_nodefile_map[p.pid] + + + @cache_output +@@ -512,16 +515,10 @@ def write_output(outfile, outfilemerger, field_separator): + continue + + if row_2_rep and row_2_rep != "": +- f.write(u"{0}{1}{2}{3}{4}\n".format(row[0], +- field_separator, +- p_rep, +- field_separator, +- row_2_rep).encode()) +- else: +- f.write(u"{0}{1}{2}\n".format(row[0], +- field_separator, +- p_rep).encode()) ++ gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep) + ++ else: ++ gfind_write(f, row[0], field_separator, p_rep) + + def mode_create(session_dir, args): + logger.debug("Init is called - Session: %s, Volume: %s" +@@ -571,6 +568,7 @@ def mode_create(session_dir, args): + + def mode_query(session_dir, args): + global gtmpfilename ++ global g_pid_nodefile_map + + # Verify volume status + cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] +@@ -634,14 +632,20 @@ def mode_query(session_dir, args): + + # Merger + if args.full: +- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] +- execute(cmd, +- exit_msg="Failed to merge output files " +- "collected from nodes", logger=logger) ++ if len(g_pid_nodefile_map) > 0: ++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ ["-o", args.outfile] ++ execute(cmd, ++ exit_msg="Failed to merge output files " ++ "collected from nodes", logger=logger) ++ else: ++ fail("Failed to collect any output files from peers. " ++ "Looks like all bricks are offline.", logger=logger) + else: + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) +- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) ++ outfilemerger = OutputMerger(args.outfile + ".db", ++ g_pid_nodefile_map.values()) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +@@ -656,6 +660,7 @@ def mode_query(session_dir, args): + + def mode_pre(session_dir, args): + global gtmpfilename ++ global g_pid_nodefile_map + + """ + Read from Session file and write to session.pre file +@@ -696,14 +701,20 @@ def mode_pre(session_dir, args): + + # Merger + if args.full: +- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] +- execute(cmd, +- exit_msg="Failed to merge output files " +- "collected from nodes", logger=logger) ++ if len(g_pid_nodefile_map) > 0: ++ cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ ["-o", args.outfile] ++ execute(cmd, ++ exit_msg="Failed to merge output files " ++ "collected from nodes", logger=logger) ++ else: ++ fail("Failed to collect any output files from peers. " ++ "Looks like all bricks are offline.", logger=logger) + else: + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) +- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) ++ outfilemerger = OutputMerger(args.outfile + ".db", ++ g_pid_nodefile_map.values()) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +-- +1.8.3.1 + diff --git a/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch new file mode 100644 index 0000000..e1f89f9 --- /dev/null +++ b/SOURCES/0348-glusterfind-Fix-py2-py3-issues.patch @@ -0,0 +1,113 @@ +From 1ca8a545833e0a6e674984245338b8675ddc58bc Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Fri, 10 Jan 2020 16:48:14 +0530 +Subject: [PATCH 348/349] glusterfind: Fix py2/py3 issues + +1. In dictionary values(), returns list in py2 and not in py3. + So explicitly convert it into list. +2. xattr module returns values in bytes. So explicitly convert + them to str to work both with py2 and py3 + +Backport of: + > Patch: https://review.gluster.org/23993 + > fixes: bz#1789439 + > Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94 + > Signed-off-by: Kotresh HR <khiremat@redhat.com> + +BUG: 1789447 +Change-Id: I27a639cda4f7a4ece9744a97c3d16e247906bd94 +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/189215 +Reviewed-by: Shwetha Acharya <sacharya@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Hari Gowtham Gopal <hgowtham@redhat.com> +Reviewed-by: Sunny Kumar <sunkumar@redhat.com> +--- + tools/glusterfind/src/changelog.py | 14 +++++++++----- + tools/glusterfind/src/main.py | 8 ++++---- + 2 files changed, 13 insertions(+), 9 deletions(-) + +diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py +index d8f97e0..d972fb5 100644 +--- a/tools/glusterfind/src/changelog.py ++++ b/tools/glusterfind/src/changelog.py +@@ -14,6 +14,7 @@ import sys + import time + import xattr + import logging ++from gfind_py2py3 import bytearray_to_str + from argparse import ArgumentParser, RawDescriptionHelpFormatter + import hashlib + try: +@@ -105,9 +106,10 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): + changelog_data.inodegfid_add(os.stat(p).st_ino, gfid) + file_xattrs = xattr.list(p) + for x in file_xattrs: +- if x.startswith("trusted.pgfid."): ++ x_str = bytearray_to_str(x) ++ if x_str.startswith("trusted.pgfid."): + # PGFID in pgfid table +- changelog_data.pgfid_add(x.split(".")[-1]) ++ changelog_data.pgfid_add(x_str.split(".")[-1]) + except (IOError, OSError): + # All OS Errors ignored, since failures will be logged + # in End. All GFIDs present in gfidpath table +@@ -122,10 +124,12 @@ def enum_hard_links_using_gfid2path(brick, gfid, args): + try: + file_xattrs = xattr.list(p) + for x in file_xattrs: +- if x.startswith("trusted.gfid2path."): ++ x_str = bytearray_to_str(x) ++ if x_str.startswith("trusted.gfid2path."): + # get the value for the xattr i.e. <PGFID>/<BN> +- v = xattr.getxattr(p, x) +- pgfid, bn = v.split(os.sep) ++ v = xattr.getxattr(p, x_str) ++ v_str = bytearray_to_str(v) ++ pgfid, bn = v_str.split(os.sep) + try: + path = symlink_gfid_to_path(brick, pgfid) + fullpath = os.path.join(path, bn) +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index fefe4a3..dfc9d07 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -633,7 +633,7 @@ def mode_query(session_dir, args): + # Merger + if args.full: + if len(g_pid_nodefile_map) > 0: +- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " +@@ -645,7 +645,7 @@ def mode_query(session_dir, args): + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) + outfilemerger = OutputMerger(args.outfile + ".db", +- g_pid_nodefile_map.values()) ++ list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +@@ -702,7 +702,7 @@ def mode_pre(session_dir, args): + # Merger + if args.full: + if len(g_pid_nodefile_map) > 0: +- cmd = ["sort", "-u"] + g_pid_nodefile_map.values() + \ ++ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " +@@ -714,7 +714,7 @@ def mode_pre(session_dir, args): + # Read each Changelogs db and generate finaldb + create_file(args.outfile, exit_on_err=True, logger=logger) + outfilemerger = OutputMerger(args.outfile + ".db", +- g_pid_nodefile_map.values()) ++ list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) + + try: +-- +1.8.3.1 + diff --git a/SOURCES/0349-glusterfind-python3-compatibility.patch b/SOURCES/0349-glusterfind-python3-compatibility.patch new file mode 100644 index 0000000..7f1c274 --- /dev/null +++ b/SOURCES/0349-glusterfind-python3-compatibility.patch @@ -0,0 +1,56 @@ +From 1354a492cbc758f9801568153380ca896fab7765 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Fri, 10 Jan 2020 14:28:35 +0000 +Subject: [PATCH 349/349] glusterfind: python3 compatibility + +Problem: +While we delete gluster volume the hook script 'S57glusterfind-delete-post.py' +is failed to execute and error message can be observed in glusterd log. + +Traceback: + File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 69, in <module> + main() + File "/var/lib/glusterd/hooks/1/delete/post/S57glusterfind-delete-post", line 39, in main + glusterfind_dir = os.path.join(get_glusterd_workdir(), "glusterfind") + File "/usr/lib64/python3.7/posixpath.py", line 94, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.7/genericpath.py", line 155, in _check_arg_types + raise TypeError("Can't mix strings and bytes in path components") from None +TypeError: Can't mix strings and bytes in path components + +Solution: + +Added the 'universal_newlines' flag to Popen to support backward compatibility. + +Backport of: + > Patch: https://review.gluster.org/23994 + > Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f + > Fixes: bz#1789478 + > Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +Change-Id: Ie5655b11b55535c5ad2338108d0448e6fdaacf4f +BUG: 1789447 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/189216 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + tools/glusterfind/S57glusterfind-delete-post.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py +index 5b5142d..5beece2 100755 +--- a/tools/glusterfind/S57glusterfind-delete-post.py ++++ b/tools/glusterfind/S57glusterfind-delete-post.py +@@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info): + + def get_glusterd_workdir(): + p = Popen(["gluster", "system::", "getwd"], +- stdout=PIPE, stderr=PIPE) ++ stdout=PIPE, stderr=PIPE, universal_newlines=True) + + out, _ = p.communicate() + +-- +1.8.3.1 + diff --git a/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch new file mode 100644 index 0000000..08f70a7 --- /dev/null +++ b/SOURCES/0350-tools-glusterfind-Remove-an-extra-argument.patch @@ -0,0 +1,37 @@ +From 6c06ac0571fb6bf0734b173cc3a75badc7554601 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Tue, 14 Jan 2020 10:51:06 +0530 +Subject: [PATCH 350/350] tools/glusterfind: Remove an extra argument + +Backport of: +> Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24011/ +> fixes: bz#1790748 +> Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1 +> Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +Change-Id: I1cb12c975142794139456d0f8e99fbdbb03c53a1 +BUG: 1789447 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/189363 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tools/glusterfind/src/main.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index dfc9d07..5ca1fec 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -515,7 +515,7 @@ def write_output(outfile, outfilemerger, field_separator): + continue + + if row_2_rep and row_2_rep != "": +- gfind_write_row(f, row[0], field_separator, p_rep, field_separator, row_2_rep) ++ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep) + + else: + gfind_write(f, row[0], field_separator, p_rep) +-- +1.8.3.1 + diff --git a/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch new file mode 100644 index 0000000..51dc3bb --- /dev/null +++ b/SOURCES/0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch @@ -0,0 +1,131 @@ +From f38f0988eb6c0d72677abceba5ebeb51ea8d44ad Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Tue, 21 Jan 2020 21:09:56 +0530 +Subject: [PATCH 351/351] server: Mount fails after reboot 1/3 gluster nodes + +Problem: At the time of coming up one server node(1x3) after reboot +client is unmounted.The client is unmounted because a client +is getting AUTH_FAILED event and client call fini for the graph.The +client is getting AUTH_FAILED because brick is not attached with a +graph at that moment + +Solution: To avoid the unmounting the client graph throw ENOENT error + from server in case if brick is not attached with server at + the time of authenticate clients. + +> Credits: Xavi Hernandez <xhernandez@redhat.com> +> Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e +> Fixes: bz#1793852 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry picked from commit e4f776308d5ee7ffeb07de0fd9e1edae6944030d) +> (Reviewd on upstream link https://review.gluster.org/#/c/glusterfs/+/24053/) + +Change-Id: Ie6fbd73cbcf23a35d8db8841b3b6036e87682f5e +BUG: 1793035 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/190042 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/protocol/bug-1433815-auth-allow.t | 1 + + xlators/protocol/client/src/client-handshake.c | 3 +- + xlators/protocol/server/src/server-handshake.c | 41 +++++++++++++++++--------- + 3 files changed, 29 insertions(+), 16 deletions(-) + +diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t +index fa22ad8..a78c0eb 100644 +--- a/tests/bugs/protocol/bug-1433815-auth-allow.t ++++ b/tests/bugs/protocol/bug-1433815-auth-allow.t +@@ -17,6 +17,7 @@ TEST $CLI volume create $V0 $H0:$B0/$V0 + # Set auth.allow so it *doesn't* include ourselves. + TEST $CLI volume set $V0 auth.allow 1.2.3.4 + TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count + + # "System getspec" will include the username and password if the request comes + # from a server (which we are). Unfortunately, this will cause authentication +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index c43756a..0002361 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -1031,8 +1031,7 @@ client_setvolume_cbk(struct rpc_req *req, struct iovec *iov, int count, + "SETVOLUME on remote-host failed: %s", remote_error); + + errno = op_errno; +- if (remote_error && +- (strcmp("Authentication failed", remote_error) == 0)) { ++ if (remote_error && (op_errno == EACCES)) { + auth_fail = _gf_true; + op_ret = 0; + } +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index 382f241..1d1177d 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -250,6 +250,7 @@ server_setvolume(rpcsvc_request_t *req) + char *subdir_mount = NULL; + char *client_name = NULL; + gf_boolean_t cleanup_starting = _gf_false; ++ gf_boolean_t xlator_in_graph = _gf_true; + + params = dict_new(); + reply = dict_new(); +@@ -311,8 +312,10 @@ server_setvolume(rpcsvc_request_t *req) + LOCK(&ctx->volfile_lock); + { + xl = get_xlator_by_name(this, name); +- if (!xl) ++ if (!xl) { ++ xlator_in_graph = _gf_false; + xl = this; ++ } + } + UNLOCK(&ctx->volfile_lock); + if (xl == NULL) { +@@ -568,20 +571,30 @@ server_setvolume(rpcsvc_request_t *req) + "failed to set error " + "msg"); + } else { +- gf_event(EVENT_CLIENT_AUTH_REJECT, +- "client_uid=%s;" +- "client_identifier=%s;server_identifier=%s;" +- "brick_path=%s", +- client->client_uid, req->trans->peerinfo.identifier, +- req->trans->myinfo.identifier, name); +- gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR, +- "Cannot authenticate client" +- " from %s %s", +- client->client_uid, (clnt_version) ? clnt_version : "old"); +- + op_ret = -1; +- op_errno = EACCES; +- ret = dict_set_str(reply, "ERROR", "Authentication failed"); ++ if (!xlator_in_graph) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOENT, PS_MSG_AUTHENTICATE_ERROR, ++ "Cannot authenticate client" ++ " from %s %s because brick is not attached in graph", ++ client->client_uid, (clnt_version) ? clnt_version : "old"); ++ ++ op_errno = ENOENT; ++ ret = dict_set_str(reply, "ERROR", "Brick not found"); ++ } else { ++ gf_event(EVENT_CLIENT_AUTH_REJECT, ++ "client_uid=%s;" ++ "client_identifier=%s;server_identifier=%s;" ++ "brick_path=%s", ++ client->client_uid, req->trans->peerinfo.identifier, ++ req->trans->myinfo.identifier, name); ++ gf_msg(this->name, GF_LOG_ERROR, EACCES, PS_MSG_AUTHENTICATE_ERROR, ++ "Cannot authenticate client" ++ " from %s %s", ++ client->client_uid, (clnt_version) ? clnt_version : "old"); ++ ++ op_errno = EACCES; ++ ret = dict_set_str(reply, "ERROR", "Authentication failed"); ++ } + if (ret < 0) + gf_msg_debug(this->name, 0, + "failed to set error " +-- +1.8.3.1 + diff --git a/SOURCES/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch b/SOURCES/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch new file mode 100644 index 0000000..1d9a389 --- /dev/null +++ b/SOURCES/0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch @@ -0,0 +1,38 @@ +From 8074906ace5fbd71b5d24cc3da5571ebdebed859 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 2 Jan 2020 11:27:47 +0000 +Subject: [PATCH 352/353] spec: fixed missing dependencies for + glusterfs-cloudsync-plugins + +RPMDiff raises a warning, subpackage glusterfs-cloudsync-plugins +on x86_64 consumes library libglusterfs.so.0()(64bit) from +subpackage glusterfs-libs but does not have explicit package +version requirement, which is fixed using this patch. + +Label: DOWNSTREAM ONLY + +BUG: 1775564 + +Change-Id: I05ea46ac2c92090f01c07dfbd6e0d66498f1c586 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/188619 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 671ee27..e95e539 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -374,6 +374,7 @@ This package provides the GlusterFS CLI application and its man page + %package cloudsync-plugins + Summary: Cloudsync Plugins + BuildRequires: libcurl-devel ++Requires: glusterfs-libs = %{version}-%{release} + + %description cloudsync-plugins + GlusterFS is a distributed file-system capable of scaling to several +-- +1.8.3.1 + diff --git a/SOURCES/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch b/SOURCES/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch new file mode 100644 index 0000000..e436373 --- /dev/null +++ b/SOURCES/0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch @@ -0,0 +1,47 @@ +From 37e2d76579abf38031d1cd9769da798fa04b183a Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Wed, 22 Jan 2020 14:14:33 -0500 +Subject: [PATCH 353/353] build: glusterfs-ganesha pkg requires + python3-policycoreutils on rhel8 + +glusterfs-ganesha pkg requires policycoreutils-python-utils on rhel8, +not policycoreutils-python + +also requires nfs-ganesha-selinux on rhel-8 (optional on rhel-7) + +Label: DOWNSTREAM ONLY + +Change-Id: Ia97b4dabdc098fb76e3f60e8b48ea4191e677136 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +BUG: 1794153 +Reviewed-on: https://code.engineering.redhat.com/gerrit/190130 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index e95e539..7c8a751 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -462,6 +462,7 @@ Summary: NFS-Ganesha configuration + Group: Applications/File + + Requires: %{name}-server%{?_isa} = %{version}-%{release} ++Requires: nfs-ganesha-selinux >= 2.7.3 + Requires: nfs-ganesha-gluster >= 2.7.3 + Requires: pcs, dbus + %if ( 0%{?rhel} && 0%{?rhel} == 6 ) +@@ -475,7 +476,7 @@ Requires: net-tools + %endif + + %if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) +-%if ( 0%{?rhel} ) ++%if ( 0%{?rhel} && 0%{?rhel} < 8 ) + Requires: selinux-policy >= 3.13.1-160 + Requires(post): policycoreutils-python + Requires(postun): policycoreutils-python +-- +1.8.3.1 + diff --git a/SOURCES/0354-core-fix-memory-pool-management-races.patch b/SOURCES/0354-core-fix-memory-pool-management-races.patch new file mode 100644 index 0000000..a7cdfc0 --- /dev/null +++ b/SOURCES/0354-core-fix-memory-pool-management-races.patch @@ -0,0 +1,466 @@ +From 75a9d946d252ce70460144615ca17dbdf2e80fab Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 7 Feb 2020 10:19:57 +0100 +Subject: [PATCH 354/355] core: fix memory pool management races + +Objects allocated from a per-thread memory pool keep a reference to it +to be able to return the object to the pool when not used anymore. The +object holding this reference can have a long life cycle that could +survive a glfs_fini() call. + +This means that it's unsafe to destroy memory pools from glfs_fini(). + +Another side effect of destroying memory pools from glfs_fini() is that +the TLS variable that points to one of those pools cannot be reset for +all alive threads. This means that any attempt to allocate memory from +those threads will access already free'd memory, which is very +dangerous. + +To fix these issues, mem_pools_fini() doesn't destroy pool lists +anymore. Only at process termination the pools are destroyed. + +Upatream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24099 +> Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 +> Fixes: bz#1801684 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 +BUG: 1800703 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/192262 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/globals.c | 13 ++- + libglusterfs/src/glusterfs/globals.h | 3 + + libglusterfs/src/glusterfs/mem-pool.h | 28 ++--- + libglusterfs/src/mem-pool.c | 201 ++++++++++++++++++---------------- + libglusterfs/src/syncop.c | 7 ++ + 5 files changed, 146 insertions(+), 106 deletions(-) + +diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c +index 02098e6..e433ee8 100644 +--- a/libglusterfs/src/globals.c ++++ b/libglusterfs/src/globals.c +@@ -319,7 +319,18 @@ glusterfs_cleanup(void *ptr) + GF_FREE(thread_syncopctx.groups); + } + +- mem_pool_thread_destructor(); ++ mem_pool_thread_destructor(NULL); ++} ++ ++void ++gf_thread_needs_cleanup(void) ++{ ++ /* The value stored in free_key TLS is not really used for anything, but ++ * pthread implementation doesn't call the TLS destruction function unless ++ * it's != NULL. This function must be called whenever something is ++ * allocated for this thread so that glusterfs_cleanup() will be called ++ * and resources can be released. */ ++ (void)pthread_setspecific(free_key, (void *)1); + } + + static void +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index e218285..31717ed 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -181,6 +181,9 @@ glusterfs_leaseid_exist(void); + int + glusterfs_globals_init(glusterfs_ctx_t *ctx); + ++void ++gf_thread_needs_cleanup(void); ++ + struct tvec_base * + glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx); + void +diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h +index be0a26d..97bf76c 100644 +--- a/libglusterfs/src/glusterfs/mem-pool.h ++++ b/libglusterfs/src/glusterfs/mem-pool.h +@@ -245,24 +245,26 @@ typedef struct per_thread_pool { + } per_thread_pool_t; + + typedef struct per_thread_pool_list { +- /* +- * These first two members are protected by the global pool lock. When +- * a thread first tries to use any pool, we create one of these. We +- * link it into the global list using thr_list so the pool-sweeper +- * thread can find it, and use pthread_setspecific so this thread can +- * find it. When the per-thread destructor runs, we "poison" the pool +- * list to prevent further allocations. This also signals to the +- * pool-sweeper thread that the list should be detached and freed after +- * the next time it's swept. +- */ ++ /* thr_list is used to place the TLS pool_list into the active global list ++ * (pool_threads) or the inactive global list (pool_free_threads). It's ++ * protected by the global pool_lock. */ + struct list_head thr_list; +- unsigned int poison; ++ ++ /* This lock is used to update poison and the hot/cold lists of members ++ * of 'pools' array. */ ++ pthread_spinlock_t lock; ++ ++ /* This field is used to mark a pool_list as not being owned by any thread. ++ * This means that the sweeper thread won't be cleaning objects stored in ++ * its pools. mem_put() uses it to decide if the object being released is ++ * placed into its original pool_list or directly destroyed. */ ++ bool poison; ++ + /* + * There's really more than one pool, but the actual number is hidden + * in the implementation code so we just make it a single-element array + * here. + */ +- pthread_spinlock_t lock; + per_thread_pool_t pools[1]; + } per_thread_pool_list_t; + +@@ -307,7 +309,7 @@ void + mem_pool_destroy(struct mem_pool *pool); + + void +-mem_pool_thread_destructor(void); ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); + + void + gf_mem_acct_enable_set(void *ctx); +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index d88041d..2b41c01 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL; + #define POOL_SWEEP_SECS 30 + + typedef struct { +- struct list_head death_row; + pooled_obj_hdr_t *cold_lists[N_COLD_LISTS]; + unsigned int n_cold_lists; + } sweep_state_t; +@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; + static unsigned int init_count = 0; + static pthread_t sweeper_tid; + +-gf_boolean_t ++static bool + collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list) + { + unsigned int i; + per_thread_pool_t *pt_pool; +- gf_boolean_t poisoned; + + (void)pthread_spin_lock(&pool_list->lock); + +- poisoned = pool_list->poison != 0; +- if (!poisoned) { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- if (pt_pool->cold_list) { +- if (state->n_cold_lists >= N_COLD_LISTS) { +- break; +- } +- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; ++ for (i = 0; i < NPOOLS; ++i) { ++ pt_pool = &pool_list->pools[i]; ++ if (pt_pool->cold_list) { ++ if (state->n_cold_lists >= N_COLD_LISTS) { ++ (void)pthread_spin_unlock(&pool_list->lock); ++ return true; + } +- pt_pool->cold_list = pt_pool->hot_list; +- pt_pool->hot_list = NULL; ++ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; + } ++ pt_pool->cold_list = pt_pool->hot_list; ++ pt_pool->hot_list = NULL; + } + + (void)pthread_spin_unlock(&pool_list->lock); + +- return poisoned; ++ return false; + } + +-void ++static void + free_obj_list(pooled_obj_hdr_t *victim) + { + pooled_obj_hdr_t *next; +@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim) + } + } + +-void * ++static void * + pool_sweeper(void *arg) + { + sweep_state_t state; + per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- per_thread_pool_t *pt_pool; +- unsigned int i; +- gf_boolean_t poisoned; ++ uint32_t i; ++ bool pending; + + /* + * This is all a bit inelegant, but the point is to avoid doing + * expensive things (like freeing thousands of objects) while holding a +- * global lock. Thus, we split each iteration into three passes, with ++ * global lock. Thus, we split each iteration into two passes, with + * only the first and fastest holding the lock. + */ + ++ pending = true; ++ + for (;;) { +- sleep(POOL_SWEEP_SECS); ++ /* If we know there's pending work to do (or it's the first run), we ++ * do collect garbage more often. */ ++ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS); ++ + (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); +- INIT_LIST_HEAD(&state.death_row); + state.n_cold_lists = 0; ++ pending = false; + + /* First pass: collect stuff that needs our attention. */ + (void)pthread_mutex_lock(&pool_lock); +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list) ++ list_for_each_entry(pool_list, &pool_threads, thr_list) + { +- (void)pthread_mutex_unlock(&pool_lock); +- poisoned = collect_garbage(&state, pool_list); +- (void)pthread_mutex_lock(&pool_lock); +- +- if (poisoned) { +- list_move(&pool_list->thr_list, &state.death_row); ++ if (collect_garbage(&state, pool_list)) { ++ pending = true; + } + } + (void)pthread_mutex_unlock(&pool_lock); + +- /* Second pass: free dead pools. */ +- (void)pthread_mutex_lock(&pool_free_lock); +- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- free_obj_list(pt_pool->cold_list); +- free_obj_list(pt_pool->hot_list); +- pt_pool->hot_list = pt_pool->cold_list = NULL; +- } +- list_del(&pool_list->thr_list); +- list_add(&pool_list->thr_list, &pool_free_threads); +- } +- (void)pthread_mutex_unlock(&pool_free_lock); +- +- /* Third pass: free cold objects from live pools. */ ++ /* Second pass: free cold objects from live pools. */ + for (i = 0; i < state.n_cold_lists; ++i) { + free_obj_list(state.cold_lists[i]); + } + (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + } ++ ++ return NULL; + } + + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { +- per_thread_pool_list_t *pool_list = thread_pool_list; +- +- /* The pool-sweeper thread will take it from here. +- * +- * We can change 'poison' here without taking locks because the change +- * itself doesn't interact with other parts of the code and a simple write +- * is already atomic from the point of view of the processor. +- * +- * This change can modify what mem_put() does, but both possibilities are +- * fine until the sweeper thread kicks in. The real synchronization must be +- * between mem_put() and the sweeper thread. */ ++ per_thread_pool_t *pt_pool; ++ uint32_t i; ++ ++ if (pool_list == NULL) { ++ pool_list = thread_pool_list; ++ } ++ ++ /* The current thread is terminating. None of the allocated objects will ++ * be used again. We can directly destroy them here instead of delaying ++ * it until the next sweeper loop. */ + if (pool_list != NULL) { +- pool_list->poison = 1; ++ /* Remove pool_list from the global list to avoid that sweeper ++ * could touch it. */ ++ pthread_mutex_lock(&pool_lock); ++ list_del(&pool_list->thr_list); ++ pthread_mutex_unlock(&pool_lock); ++ ++ /* We need to protect hot/cold changes from potential mem_put() calls ++ * that reference this pool_list. Once poison is set to true, we are ++ * sure that no one else will touch hot/cold lists. The only possible ++ * race is when at the same moment a mem_put() is adding a new item ++ * to the hot list. We protect from that by taking pool_list->lock. ++ * After that we don't need the lock to destroy the hot/cold lists. */ ++ pthread_spin_lock(&pool_list->lock); ++ pool_list->poison = true; ++ pthread_spin_unlock(&pool_list->lock); ++ ++ for (i = 0; i < NPOOLS; i++) { ++ pt_pool = &pool_list->pools[i]; ++ ++ free_obj_list(pt_pool->hot_list); ++ pt_pool->hot_list = NULL; ++ ++ free_obj_list(pt_pool->cold_list); ++ pt_pool->cold_list = NULL; ++ } ++ ++ pthread_mutex_lock(&pool_free_lock); ++ list_add(&pool_list->thr_list, &pool_free_threads); ++ pthread_mutex_unlock(&pool_free_lock); ++ + thread_pool_list = NULL; + } + } +@@ -528,6 +538,30 @@ mem_pools_preinit(void) + init_done = GF_MEMPOOL_INIT_EARLY; + } + ++static __attribute__((destructor)) void ++mem_pools_postfini(void) ++{ ++ per_thread_pool_list_t *pool_list, *next; ++ ++ /* This is part of a process shutdown (or dlclose()) which means that ++ * most probably all threads should be stopped. However this is not the ++ * case for gluster and there are even legitimate situations in which we ++ * could have some threads alive. What is sure is that none of those ++ * threads should be using anything from this library, so destroying ++ * everything here should be fine and safe. */ ++ ++ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) ++ { ++ mem_pool_thread_destructor(pool_list); ++ } ++ ++ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) ++ { ++ list_del(&pool_list->thr_list); ++ FREE(pool_list); ++ } ++} ++ + /* Call mem_pools_init() once threading has been configured completely. This + * prevent the pool_sweeper thread from getting killed once the main() thread + * exits during deamonizing. */ +@@ -560,10 +594,6 @@ mem_pools_fini(void) + */ + break; + case 1: { +- per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- unsigned int i; +- + /* if mem_pools_init() was not called, sweeper_tid will be invalid + * and the functions will error out. That is not critical. In all + * other cases, the sweeper_tid will be valid and the thread gets +@@ -571,32 +601,11 @@ mem_pools_fini(void) + (void)pthread_cancel(sweeper_tid); + (void)pthread_join(sweeper_tid, NULL); + +- /* At this point all threads should have already terminated, so +- * it should be safe to destroy all pending per_thread_pool_list_t +- * structures that are stored for each thread. */ +- mem_pool_thread_destructor(); +- +- /* free all objects from all pools */ +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, +- thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- free_obj_list(pool_list->pools[i].hot_list); +- free_obj_list(pool_list->pools[i].cold_list); +- pool_list->pools[i].hot_list = NULL; +- pool_list->pools[i].cold_list = NULL; +- } +- +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } +- +- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads, +- thr_list) +- { +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } ++ /* There could be threads still running in some cases, so we can't ++ * destroy pool_lists in use. We can also not destroy unused ++ * pool_lists because some allocated objects may still be pointing ++ * to them. */ ++ mem_pool_thread_destructor(NULL); + + init_done = GF_MEMPOOL_INIT_DESTROY; + /* Fall through. */ +@@ -617,7 +626,7 @@ mem_pools_fini(void) + { + } + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { + } + +@@ -738,13 +747,21 @@ mem_get_pool_list(void) + } + } + ++ /* There's no need to take pool_list->lock, because this is already an ++ * atomic operation and we don't need to synchronize it with any change ++ * in hot/cold lists. */ ++ pool_list->poison = false; ++ + (void)pthread_mutex_lock(&pool_lock); +- pool_list->poison = 0; + list_add(&pool_list->thr_list, &pool_threads); + (void)pthread_mutex_unlock(&pool_lock); + + thread_pool_list = pool_list; + ++ /* Ensure that all memory objects associated to the new pool_list are ++ * destroyed when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ + return pool_list; + } + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 2eb7b49..0de53c6 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups) + + /* set/reset the ngrps, this is where reset of groups is handled */ + opctx->ngrps = count; ++ ++ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) { ++ /* This is the first time we are storing groups into the TLS structure ++ * so we mark the current thread so that it will be properly cleaned ++ * up when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ } + opctx->valid |= SYNCOPCTX_GROUPS; + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0355-core-Prevent-crash-on-process-termination.patch b/SOURCES/0355-core-Prevent-crash-on-process-termination.patch new file mode 100644 index 0000000..fca3f2c --- /dev/null +++ b/SOURCES/0355-core-Prevent-crash-on-process-termination.patch @@ -0,0 +1,74 @@ +From 10f1730073b9fb02d2ed7f7de855afd6df0e5202 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Wed, 19 Feb 2020 12:24:15 +0100 +Subject: [PATCH 355/355] core: Prevent crash on process termination + +A previous patch (ce61da816a) has fixed a use-after-free issue, +but it doesn't work well when the final cleanup is done at process +termination because gluster doesn't stop other threads before +calling exit(). + +For this reason, the final cleanup is removed to avoid the crash, +at least until the termination sequence properly stops all gluster +threads before exiting the program. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24138 +> Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b +> Fixes: bz#1801684 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: Id7cfb4407fcf208e28f03a7c3cdc3ef9c1f3bf9b +BUG: 1800703 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/192344 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/mem-pool.c | 30 +++++++++++------------------- + 1 file changed, 11 insertions(+), 19 deletions(-) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 2b41c01..73503e0 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -541,25 +541,17 @@ mem_pools_preinit(void) + static __attribute__((destructor)) void + mem_pools_postfini(void) + { +- per_thread_pool_list_t *pool_list, *next; +- +- /* This is part of a process shutdown (or dlclose()) which means that +- * most probably all threads should be stopped. However this is not the +- * case for gluster and there are even legitimate situations in which we +- * could have some threads alive. What is sure is that none of those +- * threads should be using anything from this library, so destroying +- * everything here should be fine and safe. */ +- +- list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) +- { +- mem_pool_thread_destructor(pool_list); +- } +- +- list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) +- { +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } ++ /* TODO: This function should destroy all per thread memory pools that ++ * are still alive, but this is not possible right now because glibc ++ * starts calling destructors as soon as exit() is called, and ++ * gluster doesn't ensure that all threads have been stopped before ++ * calling exit(). Existing threads would crash when they try to use ++ * memory or they terminate if we destroy things here. ++ * ++ * When we propertly terminate all threads, we can add the needed ++ * code here. Till then we need to leave the memory allocated. Most ++ * probably this function will be executed on process termination, ++ * so the memory will be released anyway by the system. */ + } + + /* Call mem_pools_init() once threading has been configured completely. This +-- +1.8.3.1 + diff --git a/SOURCES/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch b/SOURCES/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch new file mode 100644 index 0000000..f2b6835 --- /dev/null +++ b/SOURCES/0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch @@ -0,0 +1,26 @@ +From 4099fb424482ede2fb6346c76c58523113f415df Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 12 Mar 2020 01:02:41 -0400 +Subject: [PATCH 356/357] Update rfc.sh to rhgs-3.5.1-rhel-8 + +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index 918fb11..a408e45 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.1"; ++branch="rhgs-3.5.1-rhel-8"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch b/SOURCES/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch new file mode 100644 index 0000000..a67b89c --- /dev/null +++ b/SOURCES/0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch @@ -0,0 +1,268 @@ +From 2d5e678f8331d4d99ee4dff6e166cbf01c83ab36 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Wed, 12 Feb 2020 12:47:57 -0500 +Subject: [PATCH 357/357] ganesha-ha: updates for pcs-0.10.x (i.e. in Fedora-29 + and RHEL-8) + +pcs-0.10 has introduced changes options to pcs commands + +pcs-0.10.x is in Fedora-29 and later and RHEL-8. + +Also some minor cleanup. Namely use bash built-in [[...]] in a few +more places instead of test(1), i.e. [...], and use correct "==" for +comparison. + +master: https://review.gluster.org/24115 + +Change-Id: I3fb2fcd71406964c77fdc4f18580ca133f365fd6 +BUG: 1802727 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/194467 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 84 ++++++++++++++++++++++++------------ + 1 file changed, 56 insertions(+), 28 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 32af1ca..0b0050a 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -28,7 +28,12 @@ HA_VOL_MNT="/var/run/gluster/shared_storage" + HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" + SERVICE_MAN="DISTRO_NOT_FOUND" + +-RHEL6_PCS_CNAME_OPTION="--name" ++# rhel, fedora id, version ++ID="" ++VERSION_ID="" ++ ++PCS9OR10_PCS_CNAME_OPTION="" ++PCS9OR10_PCS_CLONE_OPTION="clone" + SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" + + # UNBLOCK RA uses shared_storage which may become unavailable +@@ -101,9 +106,9 @@ determine_service_manager () { + then + SERVICE_MAN="/sbin/service" + fi +- if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ] ++ if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]] + then +- echo "Service manager not recognized, exiting" ++ logger "Service manager not recognized, exiting" + exit 1 + fi + } +@@ -114,7 +119,7 @@ manage_service () + local new_node=${2} + local option= + +- if [ "${action}" == "start" ]; then ++ if [[ "${action}" == "start" ]]; then + option="yes" + else + option="no" +@@ -122,7 +127,7 @@ manage_service () + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" + +- if [ "${SERVICE_MAN}" == "/bin/systemctl" ] ++ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]] + then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" +@@ -140,7 +145,7 @@ check_cluster_exists() + + if [ -e /var/run/corosync.pid ]; then + cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) +- if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then ++ if [[ "${cluster_name}X" == "${name}X" ]]; then + logger "$name already exists, exiting" + exit 0 + fi +@@ -155,7 +160,7 @@ determine_servers() + local tmp_ifs=${IFS} + local ha_servers="" + +- if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then ++ if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then + ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') + IFS=$' ' + for server in ${ha_servers} ; do +@@ -193,15 +198,21 @@ setup_cluster() + + logger "setting up cluster ${name} with the following ${servers}" + +- pcs cluster auth ${servers} +- # pcs cluster setup --name ${name} ${servers} +- pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} ++ # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers} ++ pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} + if [ $? -ne 0 ]; then +- logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --enable --transport udpu ${servers} failed" ++ logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" + exit 1; + fi ++ ++ # pcs cluster auth ${servers} ++ pcs cluster auth ++ if [ $? -ne 0 ]; then ++ logger "pcs cluster auth failed" ++ fi ++ + pcs cluster start --all + if [ $? -ne 0 ]; then + logger "pcs cluster start failed" +@@ -217,7 +228,7 @@ setup_cluster() + done + + unclean=$(pcs status | grep -u "UNCLEAN") +- while [[ "${unclean}X" = "UNCLEANX" ]]; do ++ while [[ "${unclean}X" == "UNCLEANX" ]]; do + sleep 1 + unclean=$(pcs status | grep -u "UNCLEAN") + done +@@ -244,7 +255,7 @@ setup_finalize_ha() + local stopped="" + + stopped=$(pcs status | grep -u "Stopped") +- while [[ "${stopped}X" = "StoppedX" ]]; do ++ while [[ "${stopped}X" == "StoppedX" ]]; do + sleep 1 + stopped=$(pcs status | grep -u "Stopped") + done +@@ -265,7 +276,7 @@ refresh_config () + if [ -e ${SECRET_PEM} ]; then + while [[ ${3} ]]; do + current_host=`echo ${3} | cut -d "." -f 1` +- if [ ${short_host} != ${current_host} ]; then ++ if [[ ${short_host} != ${current_host} ]]; then + output=$(ssh -oPasswordAuthentication=no \ + -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ + "dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +@@ -398,7 +409,7 @@ wrap_create_virt_ip_constraints() + # the result is "node2 node3 node4"; for node2, "node3 node4 node1" + # and so on. + while [[ ${1} ]]; do +- if [ "${1}" = "${primary}" ]; then ++ if [[ ${1} == ${primary} ]]; then + shift + while [[ ${1} ]]; do + tail=${tail}" "${1} +@@ -429,15 +440,15 @@ setup_create_resources() + local cibfile=$(mktemp -u) + + # fixup /var/lib/nfs +- logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone" +- pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone ++ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}" ++ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then +- logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" ++ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + +- pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone ++ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then +- logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed" ++ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace +@@ -445,9 +456,9 @@ setup_create_resources() + # ganesha-active crm_attribute + sleep 5 + +- pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true ++ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true + if [ $? -ne 0 ]; then +- logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" ++ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 +@@ -616,7 +627,7 @@ addnode_recreate_resources() + --after ${add_node}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ +- ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" ++ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 +@@ -780,7 +791,7 @@ setup_state_volume() + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + for server in ${HA_SERVERS} ; do +- if [ ${server} != ${dirname} ]; then ++ if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + fi +@@ -794,7 +805,7 @@ setup_state_volume() + enable_pacemaker() + { + while [[ ${1} ]]; do +- if [ "${SERVICE_MAN}" == "/usr/bin/systemctl" ]; then ++ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ + ${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else +@@ -892,7 +903,7 @@ delnode_state_volume() + rm -rf ${mnt}/nfs-ganesha/${dirname} + + for server in ${HA_SERVERS} ; do +- if [[ "${server}" != "${dirname}" ]]; then ++ if [[ ${server} != ${dirname} ]]; then + rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi +@@ -963,7 +974,7 @@ status() + + create_ganesha_conf_file() + { +- if [ $1 == "yes" ]; ++ if [[ "$1" == "yes" ]]; + then + if [ -e $GANESHA_CONF ]; + then +@@ -1012,6 +1023,13 @@ main() + semanage boolean -m gluster_use_execmem --on + fi + ++ local osid="" ++ ++ osid=$(grep ^ID= /etc/os-release) ++ eval $(echo ${osid} | grep -F ID=) ++ osid=$(grep ^VERSION_ID= /etc/os-release) ++ eval $(echo ${osid} | grep -F VERSION_ID=) ++ + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" +@@ -1032,7 +1050,17 @@ main() + + determine_servers "setup" + +- if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then ++ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x ++ # default is pcs-0.10.x options but check for ++ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly ++ if [[ ${ID} =~ {rhel,centos} ]]; then ++ if [[ ${VERSION_ID} == 7.* ]]; then ++ PCS9OR10_PCS_CNAME_OPTION="--name" ++ PCS9OR10_PCS_CLONE_OPTION="--clone" ++ fi ++ fi ++ ++ if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then + + determine_service_manager + +-- +1.8.3.1 + diff --git a/SOURCES/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch b/SOURCES/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch new file mode 100644 index 0000000..d7138a6 --- /dev/null +++ b/SOURCES/0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch @@ -0,0 +1,51 @@ +From 0d8c6d78130d22c475010bcce8055073b19de82a Mon Sep 17 00:00:00 2001 +From: Xie Changlong <xiechanglong@cmss.chinamobile.com> +Date: Fri, 17 May 2019 18:33:11 +0800 +Subject: [PATCH 358/362] inode: fix wrong loop count in __inode_ctx_free + +Avoid serious memory leak + +Backport of : +>fixes: bz#1711240 +>Upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22738/ +>Change-Id: Ic61a8fdd0e941e136c98376a87b5a77fa8c22316 +>Signed-off-by: Xie Changlong <xiechanglong@cmss.chinamobile.com> + +BUG: 1781543 +Change-Id: I601ebb6cd6744a61c64edd3d21d3b9a0edf1e95b +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/195611 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/inode.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 5331e93..9dbb25b 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -402,14 +402,15 @@ __inode_ctx_free(inode_t *inode) + goto noctx; + } + +- for (index = 0; index < inode->table->xl->graph->xl_count; index++) { ++ for (index = 0; index < inode->table->ctxcount; index++) { + if (inode->_ctx[index].value1 || inode->_ctx[index].value2) { + xl = (xlator_t *)(long)inode->_ctx[index].xl_key; +- old_THIS = THIS; +- THIS = xl; +- if (!xl->call_cleanup && xl->cbks->forget) ++ if (xl && !xl->call_cleanup && xl->cbks->forget) { ++ old_THIS = THIS; ++ THIS = xl; + xl->cbks->forget(xl, inode); +- THIS = old_THIS; ++ THIS = old_THIS; ++ } + } + } + +-- +1.8.3.1 + diff --git a/SOURCES/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch b/SOURCES/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch new file mode 100644 index 0000000..bd730bc --- /dev/null +++ b/SOURCES/0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch @@ -0,0 +1,41 @@ +From c0efaa98d777e4520028bf55482846b3ef5fca3a Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Wed, 1 Apr 2020 12:14:31 +0530 +Subject: [PATCH 359/362] dht: gf_defrag_process_dir is called even if + gf_defrag_fix_layout has failed + +Currently even though gf_defrag_fix_layout fails with ENOENT or ESTALE, a +subsequent call is made to gf_defrag_process_dir leading to rebalance failure. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/24225 + +> fixes: #1102 +> Change-Id: Ib0c309fd78e89a000fed3feb4bbe2c5b48e61478 +> Signed-off-by: Susant Palai <spalai@redhat.com> + +BUG: 1812789 +Change-Id: Ib0c309fd78e89a000fed3feb4bbe2c5b48e61478 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/196249 +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 559f046..f4c62b8 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3939,6 +3939,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + defrag->total_failures++; + } + ret = 0; ++ goto out; + } else { + gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED, + "Setxattr failed for %s", loc->path); +-- +1.8.3.1 + diff --git a/SOURCES/0360-rpc-Make-ssl-log-more-useful.patch b/SOURCES/0360-rpc-Make-ssl-log-more-useful.patch new file mode 100644 index 0000000..05e903d --- /dev/null +++ b/SOURCES/0360-rpc-Make-ssl-log-more-useful.patch @@ -0,0 +1,117 @@ +From 2b859d1a5499a215c8c37472d4fc7d7e4d70dac6 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Tue, 31 Mar 2020 16:45:35 +0530 +Subject: [PATCH 360/362] rpc: Make ssl log more useful + +Currently, ssl_setup_connection_params throws 4 messages for every +rpc connection that irritates a user while reading the logs. The same +info we can print in a single log with peerinfo to make it more +useful.ssl_setup_connection_params try to load dh_param even user +has not configured it and if a dh_param file is not available it throws +a failure message.To avoid the message load dh_param only while the user +has configured it. + +> Change-Id: I9ddb57f86a3fa3e519180cb5d88828e59fe0e487 +> Fixes: #1141 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> Cherry pick from commit 80dd8cceab3b860bf1bc2945c8e2d8d0b3913e48 +> Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24270/ + +BUG: 1812824 +Change-Id: I9ddb57f86a3fa3e519180cb5d88828e59fe0e487 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/196371 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 46 ++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 20 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index f54ca83..65845ea 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4240,6 +4240,7 @@ ssl_setup_connection_params(rpc_transport_t *this) + char *cipher_list = DEFAULT_CIPHER_LIST; + char *dh_param = DEFAULT_DH_PARAM; + char *ec_curve = DEFAULT_EC_CURVE; ++ gf_boolean_t dh_flag = _gf_false; + + priv = this->private; + +@@ -4248,6 +4249,10 @@ ssl_setup_connection_params(rpc_transport_t *this) + return 0; + } + ++ if (!priv->ssl_enabled && !priv->mgmt_ssl) { ++ return 0; ++ } ++ + priv->ssl_own_cert = DEFAULT_CERT_PATH; + if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) { + if (!priv->ssl_enabled) { +@@ -4294,27 +4299,25 @@ ssl_setup_connection_params(rpc_transport_t *this) + priv->crl_path = gf_strdup(optstr); + } + +- gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG, +- "SSL support on the I/O path is %s", +- priv->ssl_enabled ? "ENABLED" : "NOT enabled"); +- gf_log(this->name, priv->mgmt_ssl ? GF_LOG_INFO : GF_LOG_DEBUG, +- "SSL support for glusterd is %s", +- priv->mgmt_ssl ? "ENABLED" : "NOT enabled"); +- + if (!priv->mgmt_ssl) { +- if (!dict_get_int32(this->options, SSL_CERT_DEPTH_OPT, &cert_depth)) { +- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d", +- cert_depth); ++ if (!dict_get_int32_sizen(this->options, SSL_CERT_DEPTH_OPT, ++ &cert_depth)) { + } + } else { + cert_depth = this->ctx->ssl_cert_depth; +- gf_log(this->name, GF_LOG_INFO, "using certificate depth %d", +- cert_depth); + } +- if (!dict_get_str(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) { ++ gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG, ++ "SSL support for MGMT is %s IO path is %s certificate depth is %d " ++ "for peer %s", ++ (priv->mgmt_ssl ? "ENABLED" : "NOT enabled"), ++ (priv->ssl_enabled ? "ENABLED" : "NOT enabled"), cert_depth, ++ this->peerinfo.identifier); ++ ++ if (!dict_get_str_sizen(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) { + gf_log(this->name, GF_LOG_INFO, "using cipher list %s", cipher_list); + } +- if (!dict_get_str(this->options, SSL_DH_PARAM_OPT, &dh_param)) { ++ if (!dict_get_str_sizen(this->options, SSL_DH_PARAM_OPT, &dh_param)) { ++ dh_flag = _gf_true; + gf_log(this->name, GF_LOG_INFO, "using DH parameters %s", dh_param); + } + if (!dict_get_str(this->options, SSL_EC_CURVE_OPT, &ec_curve)) { +@@ -4349,12 +4352,15 @@ ssl_setup_connection_params(rpc_transport_t *this) + #ifdef SSL_OP_NO_COMPRESSION + SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_COMPRESSION); + #endif +- +- if ((bio = BIO_new_file(dh_param, "r")) == NULL) { +- gf_log(this->name, GF_LOG_INFO, +- "failed to open %s, " +- "DH ciphers are disabled", +- dh_param); ++ /* Upload file to bio wrapper only if dh param is configured ++ */ ++ if (dh_flag) { ++ if ((bio = BIO_new_file(dh_param, "r")) == NULL) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to open %s, " ++ "DH ciphers are disabled", ++ dh_param); ++ } + } + + if (bio != NULL) { +-- +1.8.3.1 + diff --git a/SOURCES/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch b/SOURCES/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch new file mode 100644 index 0000000..62b2fe0 --- /dev/null +++ b/SOURCES/0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch @@ -0,0 +1,122 @@ +From 04b824ebfcf80c648d5855f10bc30fde45fd62eb Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Thu, 26 Mar 2020 10:46:16 +0000 +Subject: [PATCH 361/362] snap_scheduler: python3 compatibility and new test + case +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Problem: +"snap_scheduler.py init" command failing with the below traceback: + +[root@dhcp43-104 ~]# snap_scheduler.py init +Traceback (most recent call last): + File "/usr/sbin/snap_scheduler.py", line 941, in <module> + sys.exit(main(sys.argv[1:])) + File "/usr/sbin/snap_scheduler.py", line 851, in main + initLogger() + File "/usr/sbin/snap_scheduler.py", line 153, in initLogger + logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log") + File "/usr/lib64/python3.6/posixpath.py", line 94, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.6/genericpath.py", line 151, in _check_arg_types + raise TypeError("Can't mix strings and bytes in path components") from None +TypeError: Can't mix strings and bytes in path components + +Solution: + +Added the 'universal_newlines' flag to Popen to support backward compatibility. + +Added a basic test for snapshot scheduler. + +Backport Of: + + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24257/ + >Change-Id: I78e8fabd866fd96638747ecd21d292f5ca074a4e + >Fixes: #1134 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1817369 +Change-Id: I78e8fabd866fd96638747ecd21d292f5ca074a4e +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/196482 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/snap_scheduler/snap_scheduler.py | 2 +- + tests/basic/volume-snap-scheduler.t | 49 +++++++++++++++++++++++++++++++++ + 2 files changed, 50 insertions(+), 1 deletion(-) + create mode 100644 tests/basic/volume-snap-scheduler.t + +diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py +index a66c5e3..5a29d41 100755 +--- a/extras/snap_scheduler/snap_scheduler.py ++++ b/extras/snap_scheduler/snap_scheduler.py +@@ -149,7 +149,7 @@ def initLogger(): + sh.setFormatter(formatter) + + process = subprocess.Popen(["gluster", "--print-logdir"], +- stdout=subprocess.PIPE) ++ stdout=subprocess.PIPE, universal_newlines=True) + logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log") + + fh = logging.FileHandler(logfile) +diff --git a/tests/basic/volume-snap-scheduler.t b/tests/basic/volume-snap-scheduler.t +new file mode 100644 +index 0000000..a638c5c +--- /dev/null ++++ b/tests/basic/volume-snap-scheduler.t +@@ -0,0 +1,49 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $V0 ++ ++## Create, start and mount meta_volume as ++## snap_scheduler expects shared storage to be enabled. ++## This test is very basic in nature not creating any snapshot ++## and purpose is to validate snap scheduling commands. ++ ++TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; ++TEST $CLI volume start $META_VOL ++TEST mkdir -p $META_MNT ++TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT ++ ++##function to check status ++function check_status_scheduler() ++{ ++ local key=$1 ++ snap_scheduler.py status | grep -F "$key" | wc -l ++} ++ ++##Basic snap_scheduler command test init/enable/disable/list ++ ++TEST snap_scheduler.py init ++ ++TEST snap_scheduler.py enable ++ ++EXPECT 1 check_status_scheduler "Enabled" ++ ++TEST snap_scheduler.py disable ++ ++EXPECT 1 check_status_scheduler "Disabled" ++ ++TEST snap_scheduler.py list ++ ++TEST $CLI volume stop $V0; ++ ++TEST $CLI volume delete $V0; ++ ++cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0362-write-behind-fix-data-corruption.patch b/SOURCES/0362-write-behind-fix-data-corruption.patch new file mode 100644 index 0000000..aeb7242 --- /dev/null +++ b/SOURCES/0362-write-behind-fix-data-corruption.patch @@ -0,0 +1,454 @@ +From 48f6929590157d9a1697e11c02441207afdc1bed Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 27 Mar 2020 23:56:15 +0100 +Subject: [PATCH 362/362] write-behind: fix data corruption + +There was a bug in write-behind that allowed a previous completed write +to overwrite the overlapping region of data from a future write. + +Suppose we want to send three writes (W1, W2 and W3). W1 and W2 are +sequential, and W3 writes at the same offset of W2: + + W2.offset = W3.offset = W1.offset + W1.size + +Both W1 and W2 are sent in parallel. W3 is only sent after W2 completes. +So W3 should *always* overwrite the overlapping part of W2. + +Suppose write-behind processes the requests from 2 concurrent threads: + + Thread 1 Thread 2 + + <received W1> + <received W2> + wb_enqueue_tempted(W1) + /* W1 is assigned gen X */ + wb_enqueue_tempted(W2) + /* W2 is assigned gen X */ + + wb_process_queue() + __wb_preprocess_winds() + /* W1 and W2 are sequential and all + * other requisites are met to merge + * both requests. */ + __wb_collapse_small_writes(W1, W2) + __wb_fulfill_request(W2) + + __wb_pick_unwinds() -> W2 + /* In this case, since the request is + * already fulfilled, wb_inode->gen + * is not updated. */ + + wb_do_unwinds() + STACK_UNWIND(W2) + + /* The application has received the + * result of W2, so it can send W3. */ + <received W3> + + wb_enqueue_tempted(W3) + /* W3 is assigned gen X */ + + wb_process_queue() + /* Here we have W1 (which contains + * the conflicting W2) and W3 with + * same gen, so they are interpreted + * as concurrent writes that do not + * conflict. */ + __wb_pick_winds() -> W3 + + wb_do_winds() + STACK_WIND(W3) + + wb_process_queue() + /* Eventually W1 will be + * ready to be sent */ + __wb_pick_winds() -> W1 + __wb_pick_unwinds() -> W1 + /* Here wb_inode->gen is + * incremented. */ + + wb_do_unwinds() + STACK_UNWIND(W1) + + wb_do_winds() + STACK_WIND(W1) + +So, as we can see, W3 is sent before W1, which shouldn't happen. + +The problem is that wb_inode->gen is only incremented for requests that +have not been fulfilled but, after a merge, the request is marked as +fulfilled even though it has not been sent to the brick. This allows +that future requests are assigned to the same generation, which could +be internally reordered. + +Solution: + +Increment wb_inode->gen before any unwind, even if it's for a fulfilled +request. + +Special thanks to Stefan Ring for writing a reproducer that has been +crucial to identify the issue. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24263 +> Change-Id: Id4ab0f294a09aca9a863ecaeef8856474662ab45 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +> Fixes: #884 + +Change-Id: Id4ab0f294a09aca9a863ecaeef8856474662ab45 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +BUG: 1819059 +Reviewed-on: https://code.engineering.redhat.com/gerrit/196250 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/write-behind/issue-884.c | 267 +++++++++++++++++++++ + tests/bugs/write-behind/issue-884.t | 40 +++ + .../performance/write-behind/src/write-behind.c | 4 +- + 3 files changed, 309 insertions(+), 2 deletions(-) + create mode 100644 tests/bugs/write-behind/issue-884.c + create mode 100755 tests/bugs/write-behind/issue-884.t + +diff --git a/tests/bugs/write-behind/issue-884.c b/tests/bugs/write-behind/issue-884.c +new file mode 100644 +index 0000000..e9c33b3 +--- /dev/null ++++ b/tests/bugs/write-behind/issue-884.c +@@ -0,0 +1,267 @@ ++ ++#define _GNU_SOURCE ++ ++#include <stdlib.h> ++#include <stdio.h> ++#include <string.h> ++#include <time.h> ++#include <assert.h> ++#include <errno.h> ++#include <sys/types.h> ++#include <sys/stat.h> ++#include <pthread.h> ++ ++#include <glusterfs/api/glfs.h> ++ ++/* Based on a reproducer by Stefan Ring. It seems to be quite sensible to any ++ * timing modification, so the code has been maintained as is, only with minor ++ * changes. */ ++ ++struct glfs *glfs; ++ ++pthread_mutex_t the_mutex = PTHREAD_MUTEX_INITIALIZER; ++pthread_cond_t the_cond = PTHREAD_COND_INITIALIZER; ++ ++typedef struct _my_aiocb { ++ int64_t size; ++ volatile int64_t seq; ++ int which; ++} my_aiocb; ++ ++typedef struct _worker_data { ++ my_aiocb cb; ++ struct iovec iov; ++ int64_t offset; ++} worker_data; ++ ++typedef struct { ++ worker_data wdata[2]; ++ ++ volatile unsigned busy; ++} all_data_t; ++ ++all_data_t all_data; ++ ++static void ++completion_fnc(struct glfs_fd *fd, ssize_t ret, struct glfs_stat *pre, ++ struct glfs_stat *post, void *arg) ++{ ++ void *the_thread; ++ my_aiocb *cb = (my_aiocb *)arg; ++ long seq = cb->seq; ++ ++ assert(ret == cb->size); ++ ++ pthread_mutex_lock(&the_mutex); ++ pthread_cond_broadcast(&the_cond); ++ ++ all_data.busy &= ~(1 << cb->which); ++ cb->seq = -1; ++ ++ the_thread = (void *)pthread_self(); ++ printf("worker %d is done from thread %p, seq %ld!\n", cb->which, ++ the_thread, seq); ++ ++ pthread_mutex_unlock(&the_mutex); ++} ++ ++static void ++init_wdata(worker_data *data, int which) ++{ ++ data->cb.which = which; ++ data->cb.seq = -1; ++ ++ data->iov.iov_base = malloc(1024 * 1024); ++ memset(data->iov.iov_base, 6, ++ 1024 * 1024); /* tail part never overwritten */ ++} ++ ++static void ++init() ++{ ++ all_data.busy = 0; ++ ++ init_wdata(&all_data.wdata[0], 0); ++ init_wdata(&all_data.wdata[1], 1); ++} ++ ++static void ++do_write(struct glfs_fd *fd, int content, int size, int64_t seq, ++ worker_data *wdata, const char *name) ++{ ++ int ret; ++ ++ wdata->cb.size = size; ++ wdata->cb.seq = seq; ++ ++ if (content >= 0) ++ memset(wdata->iov.iov_base, content, size); ++ wdata->iov.iov_len = size; ++ ++ pthread_mutex_lock(&the_mutex); ++ printf("(%d) dispatching write \"%s\", offset %lx, len %x, seq %ld\n", ++ wdata->cb.which, name, (long)wdata->offset, size, (long)seq); ++ pthread_mutex_unlock(&the_mutex); ++ ret = glfs_pwritev_async(fd, &wdata->iov, 1, wdata->offset, 0, ++ completion_fnc, &wdata->cb); ++ assert(ret >= 0); ++} ++ ++#define IDLE 0 // both workers must be idle ++#define ANY 1 // use any worker, other one may be busy ++ ++int ++get_worker(int waitfor, int64_t excl_seq) ++{ ++ int which; ++ ++ pthread_mutex_lock(&the_mutex); ++ ++ while (waitfor == IDLE && (all_data.busy & 3) != 0 || ++ waitfor == ANY && ++ ((all_data.busy & 3) == 3 || ++ excl_seq >= 0 && (all_data.wdata[0].cb.seq == excl_seq || ++ all_data.wdata[1].cb.seq == excl_seq))) ++ pthread_cond_wait(&the_cond, &the_mutex); ++ ++ if (!(all_data.busy & 1)) ++ which = 0; ++ else ++ which = 1; ++ ++ all_data.busy |= (1 << which); ++ ++ pthread_mutex_unlock(&the_mutex); ++ ++ return which; ++} ++ ++static int ++doit(struct glfs_fd *fd) ++{ ++ int ret; ++ int64_t seq = 0; ++ int64_t offset = 0; // position in file, in blocks ++ int64_t base = 0x1000; // where to place the data, in blocks ++ ++ int async_mode = ANY; ++ ++ init(); ++ ++ for (;;) { ++ int which; ++ worker_data *wdata; ++ ++ // for growing to the first offset ++ for (;;) { ++ int gap = base + 0x42 - offset; ++ if (!gap) ++ break; ++ if (gap > 80) ++ gap = 80; ++ ++ which = get_worker(IDLE, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = offset << 9; ++ do_write(fd, 0, gap << 9, seq++, wdata, "gap-filling"); ++ ++ offset += gap; ++ } ++ ++ // 8700 ++ which = get_worker(IDLE, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0x42) << 9; ++ do_write(fd, 1, 62 << 9, seq++, wdata, "!8700"); ++ ++ // 8701 ++ which = get_worker(IDLE, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0x42) << 9; ++ do_write(fd, 2, 55 << 9, seq++, wdata, "!8701"); ++ ++ // 8702 ++ which = get_worker(async_mode, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0x79) << 9; ++ do_write(fd, 3, 54 << 9, seq++, wdata, "!8702"); ++ ++ // 8703 ++ which = get_worker(async_mode, -1); ++ wdata = &all_data.wdata[which]; ++ ++ wdata->offset = (base + 0xaf) << 9; ++ do_write(fd, 4, 81 << 9, seq++, wdata, "!8703"); ++ ++ // 8704 ++ // this writes both 5s and 6s ++ // the range of 5s is the one that overwrites 8703 ++ ++ which = get_worker(async_mode, seq - 1); ++ wdata = &all_data.wdata[which]; ++ ++ memset(wdata->iov.iov_base, 5, 81 << 9); ++ wdata->offset = (base + 0xaf) << 9; ++ do_write(fd, -1, 1623 << 9, seq++, wdata, "!8704"); ++ ++ offset = base + 0x706; ++ base += 0x1000; ++ if (base >= 0x100000) ++ break; ++ } ++ ++ printf("done!\n"); ++ fflush(stdout); ++ ++ pthread_mutex_lock(&the_mutex); ++ ++ while ((all_data.busy & 3) != 0) ++ pthread_cond_wait(&the_cond, &the_mutex); ++ ++ pthread_mutex_unlock(&the_mutex); ++ ++ ret = glfs_close(fd); ++ assert(ret >= 0); ++ /* ++ ret = glfs_fini(glfs); ++ assert(ret >= 0); ++ */ ++ return 0; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret; ++ int open_flags = O_RDWR | O_DIRECT | O_TRUNC; ++ struct glfs_fd *fd; ++ ++ glfs = glfs_new(argv[1]); ++ if (!glfs) { ++ printf("glfs_new!\n"); ++ goto out; ++ } ++ ret = glfs_set_volfile_server(glfs, "tcp", "localhost", 24007); ++ if (ret < 0) { ++ printf("set_volfile!\n"); ++ goto out; ++ } ++ ret = glfs_init(glfs); ++ if (ret) { ++ printf("init!\n"); ++ goto out; ++ } ++ fd = glfs_open(glfs, argv[2], open_flags); ++ if (!fd) { ++ printf("open!\n"); ++ goto out; ++ } ++ srand(time(NULL)); ++ return doit(fd); ++out: ++ return 1; ++} +diff --git a/tests/bugs/write-behind/issue-884.t b/tests/bugs/write-behind/issue-884.t +new file mode 100755 +index 0000000..2bcf7d1 +--- /dev/null ++++ b/tests/bugs/write-behind/issue-884.t +@@ -0,0 +1,40 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++# This test tries to detect a race condition in write-behind. It's based on a ++# reproducer written by Stefan Ring that is able to hit it sometimes. On my ++# system, it happened around 10% of the runs. This means that if this bug ++# appears again, this test will fail once every 10 runs. Most probably this ++# failure will be hidden by the automatic test retry of the testing framework. ++# ++# Please, if this test fails, it needs to be analyzed in detail. ++ ++function run() { ++ "${@}" >/dev/null ++} ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/$V0 ++# This makes it easier to hit the issue ++TEST $CLI volume set $V0 client-log-level TRACE ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++build_tester $(dirname $0)/issue-884.c -lgfapi ++ ++TEST touch $M0/testfile ++ ++# This program generates a file of 535694336 bytes with a fixed pattern ++TEST run $(dirname $0)/issue-884 $V0 testfile ++ ++# This is the md5sum of the expected pattern without corruption ++EXPECT "ad105f9349345a70fc697632cbb5eec8" echo "$(md5sum $B0/$V0/testfile | awk '{ print $1; }')" ++ ++cleanup +diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c +index 70e281a..90a0bcf 100644 +--- a/xlators/performance/write-behind/src/write-behind.c ++++ b/xlators/performance/write-behind/src/write-behind.c +@@ -1284,14 +1284,14 @@ __wb_pick_unwinds(wb_inode_t *wb_inode, list_head_t *lies) + + wb_inode->window_current += req->orig_size; + ++ wb_inode->gen++; ++ + if (!req->ordering.fulfilled) { + /* burden increased */ + list_add_tail(&req->lie, &wb_inode->liability); + + req->ordering.lied = 1; + +- wb_inode->gen++; +- + uuid_utoa_r(req->gfid, gfid); + gf_msg_debug(wb_inode->this->name, 0, + "(unique=%" PRIu64 +-- +1.8.3.1 + diff --git a/SOURCES/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch b/SOURCES/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch new file mode 100644 index 0000000..e1ea6d0 --- /dev/null +++ b/SOURCES/0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch @@ -0,0 +1,47 @@ +From d7c0dc7107a024d28196a4582bacf28ddcfbeb69 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Tue, 14 Apr 2020 07:59:22 -0400 +Subject: [PATCH 363/367] common-ha: cluster status shows "FAILOVER" when + actually HEALTHY + +pacemaker devs change the format of the ouput of `pcs status` + +Expected to find a line in the format: + +Online: .... + +but now it's + + * Online: ... + +And the `grep -E "^Online:" no longer finds the list of nodes that +are online. + + https://review.gluster.org/#/c/glusterfs/+/24333/ + +Change-Id: If2aa1e7b53c766c625d7b4cc222a83ea2c0bd72d +BUG: 1823706 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/197367 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 0b0050a..df333a1 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -935,7 +935,7 @@ status() + done + + # print the nodes that are expected to be online +- grep -E "^Online:" ${scratch} ++ grep -E "Online:" ${scratch} + + echo + +-- +1.8.3.1 + diff --git a/SOURCES/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch b/SOURCES/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch new file mode 100644 index 0000000..2c6ba98 --- /dev/null +++ b/SOURCES/0364-dht-fixing-rebalance-failures-for-files-with-holes.patch @@ -0,0 +1,97 @@ +From 5b1bfebacac649e6f5051316e4075309caf93901 Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Tue, 21 Apr 2020 19:13:41 +0300 +Subject: [PATCH 364/367] dht - fixing rebalance failures for files with holes + +Rebalance process handling of files which contains holes casued +rebalance to fail with "No space left on device" errors. +This patch modifies the code-flow in such a way that files with holes +will be rebalanced correctly. + +backport of https://review.gluster.org/#/c/glusterfs/+/24357/ +>fixes: #1187 +>Change-Id: I89bc3d4ea7f074db7213d759c49307f379543932 +>Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> + +BUG: 1823703 +Change-Id: I89bc3d4ea7f074db7213d759c49307f379543932 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/198579 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index f4c62b8..7d9df02 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -650,7 +650,7 @@ out: + static int + __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, +- int *fop_errno) ++ int *fop_errno, int file_has_holes) + { + int ret = -1; + int ret2 = -1; +@@ -819,7 +819,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + + /* No need to bother about 0 byte size files */ + if (stbuf->ia_size > 0) { +- if (conf->use_fallocate) { ++ if (conf->use_fallocate && !file_has_holes) { + ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL); + if (ret < 0) { + if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) { +@@ -846,9 +846,7 @@ __dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from, + goto out; + } + } +- } +- +- if (!conf->use_fallocate) { ++ } else { + ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL, + NULL); + if (ret < 0) { +@@ -1728,9 +1726,13 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + goto out; + } + ++ /* Try to preserve 'holes' while migrating data */ ++ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) ++ file_has_holes = 1; ++ + /* create the destination, with required modes/xattr */ + ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd, +- fop_errno); ++ fop_errno, file_has_holes); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "Create dst failed" +@@ -1774,8 +1776,8 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + * destination. We need to do update this only post migration + * as in case of failure the linkto needs to point to the source + * subvol */ +- ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, +- &dst_fd, fop_errno); ++ ret = __dht_rebalance_create_dst_file( ++ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "Create dst failed" +@@ -1862,9 +1864,6 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + ret = 0; + goto out; + } +- /* Try to preserve 'holes' while migrating data */ +- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) +- file_has_holes = 1; + + ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd, + stbuf.ia_size, file_has_holes, +-- +1.8.3.1 + diff --git a/SOURCES/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch b/SOURCES/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch new file mode 100644 index 0000000..daf8dc6 --- /dev/null +++ b/SOURCES/0365-build-geo-rep-requires-relevant-selinux-permission-f.patch @@ -0,0 +1,70 @@ +From 36180d21dc4b16619b75d65d51eaf37df4e0e2d3 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Mon, 20 Apr 2020 12:15:42 +0100 +Subject: [PATCH 365/367] build: geo-rep requires relevant selinux permission + for rsync + +If selinux is set in enforcing mode geo-rep goes into faulty state. + +To avoid this from happening some relevant selinux booleans need to be set +in 'on' state to allow rsync operation. + +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24348. + >Change-Id: Ia8ce530d6548c2a545f4c99c600f5aac2bbb3363 + >Fixes: #1182 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1813917 +Change-Id: Ia8ce530d6548c2a545f4c99c600f5aac2bbb3363 +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/198599 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 7c8a751..5ed07e7 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -130,6 +130,12 @@ + ## All %%global definitions should be placed here and keep them sorted + ## + ++# selinux booleans whose defalut value needs modification ++# these booleans will be consumed by "%%selinux_set_booleans" macro. ++%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++%global selinuxbooleans rsync_full_access=1 rsync_client=1 ++%endif ++ + %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) + %global _with_systemd true + %endif +@@ -515,6 +521,12 @@ Requires: python%{_pythonver}-gluster = %{version}-%{release} + Requires: rsync + Requires: util-linux + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++# required for setting selinux bools ++%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++Requires: selinux-policy-targeted ++Requires(post): selinux-policy-targeted ++BuildRequires: selinux-policy-devel ++%endif + + %description geo-replication + GlusterFS is a distributed file-system capable of scaling to several +@@ -941,6 +953,9 @@ exit 0 + + %if ( 0%{!?_without_georeplication:1} ) + %post geo-replication ++%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++%selinux_set_booleans %{selinuxbooleans} ++%endif + if [ $1 -ge 1 ]; then + %systemd_postun_with_restart glusterd + fi +-- +1.8.3.1 + diff --git a/SOURCES/0366-snapshot-fix-python3-issue-in-gcron.patch b/SOURCES/0366-snapshot-fix-python3-issue-in-gcron.patch new file mode 100644 index 0000000..c704a17 --- /dev/null +++ b/SOURCES/0366-snapshot-fix-python3-issue-in-gcron.patch @@ -0,0 +1,55 @@ +From d7b84014cbb19e65dfae6248af47cc23fabc64e5 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Wed, 22 Apr 2020 15:09:16 +0100 +Subject: [PATCH 366/367] snapshot: fix python3 issue in gcron + +`$gcron.py test_vol Job` +Traceback: + File "/usr/sbin/gcron.py", line 189, in <module> + main() + File "/usr/sbin/gcron.py", line 121, in main + initLogger(script_name) + File "/usr/sbin/gcron.py", line 44, in initLogger + logfile = os.path.join(out.strip(), script_name[:-3]+".log") + File "/usr/lib64/python3.6/posixpath.py", line 94, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.6/genericpath.py", line 151, in _check_arg_types + raise TypeError("Can't mix strings and bytes in path components") from None +TypeError: Can't mix strings and bytes in path components + +Solution: Added the 'universal_newlines' flag to Popen. + +Backport of: + + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24364/ + >Change-Id: I4c7a0e5bce605e4c134f6786c9dd8162b89fc77f + >Fixes: #1193 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1825195 +Change-Id: I4c7a0e5bce605e4c134f6786c9dd8162b89fc77f +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/198641 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/snap_scheduler/gcron.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py +index 1127be0..cc16310 100755 +--- a/extras/snap_scheduler/gcron.py ++++ b/extras/snap_scheduler/gcron.py +@@ -38,7 +38,8 @@ def initLogger(script_name): + sh.setFormatter(formatter) + + process = subprocess.Popen(["gluster", "--print-logdir"], +- stdout=subprocess.PIPE) ++ stdout=subprocess.PIPE, ++ universal_newlines=True) + out, err = process.communicate() + if process.returncode == 0: + logfile = os.path.join(out.strip(), script_name[:-3]+".log") +-- +1.8.3.1 + diff --git a/SOURCES/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch b/SOURCES/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch new file mode 100644 index 0000000..b94f8fc --- /dev/null +++ b/SOURCES/0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch @@ -0,0 +1,95 @@ +From aef8e51b9974603d397cc8f5301b24451d012e46 Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Fri, 24 Apr 2020 13:32:51 +0530 +Subject: [PATCH 367/367] dht: Handle setxattr and rm race for directory in + rebalance + +Problem: Selfheal as part of directory does not return an error if +the layout setxattr fails. This is because the actual lookup fop +must have been successful to proceed for layout heal. Hence, we could +not tell if fix-layout failed in rebalance. + +Solution: We can check this information in the layout structure that +whether all the xlators have returned error. + +> fixes: #1200 +> hange-Id: I3e5f2a36c0d934c21476a73a9a5473d8e490cde7 +> Signed-off-by: Susant Palai <spalai@redhat.com> +(backport of https://review.gluster.org/#/c/glusterfs/+/24375/) + +BUG: 1812789 +Change-Id: I897826c4c2e883b3085c9314deff32d649b4588e +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/198726 +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 19 +++++++++++++++++++ + xlators/cluster/dht/src/dht-common.h | 3 +++ + xlators/cluster/dht/src/dht-rebalance.c | 11 +++++++++++ + 3 files changed, 33 insertions(+) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index d0b5287..7890e7a 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -11286,3 +11286,22 @@ dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); + return 0; + } ++ ++/* The job of this function is to check if all the xlators have updated ++ * error in the layout. */ ++int ++dht_dir_layout_error_check(xlator_t *this, inode_t *inode) ++{ ++ dht_layout_t *layout = NULL; ++ int i = 0; ++ ++ layout = dht_layout_get(this, inode); ++ for (i = 0; i < layout->cnt; i++) { ++ if (layout->list[i].err == 0) { ++ return 0; ++ } ++ } ++ ++ /* Returning the first xlator error as all xlators have errors */ ++ return layout->list[0].err; ++} +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index ce11f02..4d2aae6 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1544,4 +1544,7 @@ dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + int32_t + dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno); + ++int ++dht_dir_layout_error_check(xlator_t *this, inode_t *inode); ++ + #endif /* _DHT_H */ +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 7d9df02..33cacfe 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3928,6 +3928,17 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + } + + ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL); ++ ++ /* In case of a race where the directory is deleted just before ++ * layout setxattr, the errors are updated in the layout structure. ++ * We can use this information to make a decision whether the directory ++ * is deleted entirely. ++ */ ++ if (ret == 0) { ++ ret = dht_dir_layout_error_check(this, loc->inode); ++ ret = -ret; ++ } ++ + if (ret) { + if (-ret == ENOENT || -ret == ESTALE) { + gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED, +-- +1.8.3.1 + diff --git a/SOURCES/0368-Update-rfc.sh-to-rhgs-3.5.2.patch b/SOURCES/0368-Update-rfc.sh-to-rhgs-3.5.2.patch new file mode 100644 index 0000000..c103891 --- /dev/null +++ b/SOURCES/0368-Update-rfc.sh-to-rhgs-3.5.2.patch @@ -0,0 +1,26 @@ +From 00b79c4e2837980f36f7d8387d90cfb7dc8d0d58 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Tue, 5 May 2020 12:41:41 -0400 +Subject: [PATCH 368/375] Update rfc.sh to rhgs-3.5.2 + +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index a408e45..37d551f 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.1-rhel-8"; ++branch="rhgs-3.5.2"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0369-cluster-ec-Return-correct-error-code-and-log-message.patch b/SOURCES/0369-cluster-ec-Return-correct-error-code-and-log-message.patch new file mode 100644 index 0000000..c3c8925 --- /dev/null +++ b/SOURCES/0369-cluster-ec-Return-correct-error-code-and-log-message.patch @@ -0,0 +1,53 @@ +From f30fa3938f980f03d08479776037090e7fc11f42 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Tue, 5 May 2020 18:17:49 +0530 +Subject: [PATCH 369/375] cluster/ec: Return correct error code and log message + +In case of readdir was send with an FD on which opendir +was failed, this FD will be useless and we return it with error. +For now, we are returning it with EINVAL without logging any +message in log file. + +Return a correct error code and also log the message to improve thing to debug. + +>fixes: #1220 +>Change-Id: Iaf035254b9c5aa52fa43ace72d328be622b06169 +>Signed-off-by: Ashish Pandey <aspandey@redhat.com> +(Backport of https://review.gluster.org/#/c/glusterfs/+/24407/) + +BUG: 1831403 +Change-Id: Ib5bf30c47b7491abd0ad5ca0ce52ec77945b2e53 +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/200209 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-dir-read.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c +index 8310d4a..9924425 100644 +--- a/xlators/cluster/ec/src/ec-dir-read.c ++++ b/xlators/cluster/ec/src/ec-dir-read.c +@@ -388,9 +388,16 @@ ec_manager_readdir(ec_fop_data_t *fop, int32_t state) + /* Return error if opendir has not been successfully called on + * any subvolume. */ + ctx = ec_fd_get(fop->fd, fop->xl); +- if ((ctx == NULL) || (ctx->open == 0)) { +- fop->error = EINVAL; ++ if (ctx == NULL) { ++ fop->error = ENOMEM; ++ } else if (ctx->open == 0) { ++ fop->error = EBADFD; ++ } + ++ if (fop->error) { ++ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error, ++ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s", ++ ec_msg_str(fop)); + return EC_STATE_REPORT; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch b/SOURCES/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch new file mode 100644 index 0000000..6648a4e --- /dev/null +++ b/SOURCES/0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch @@ -0,0 +1,203 @@ +From 3d230880aed85737365deafe3c9a32c67da2a79e Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Mon, 4 May 2020 19:09:00 +0530 +Subject: [PATCH 370/375] dht: Do opendir selectively in gf_defrag_process_dir + +Currently opendir is done from the cluster view. Hence, even if +one opendir is successful, the opendir operation as a whole is considered +successful. + +But since in gf_defrag_get_entry we fetch entries selectively from +local_subvols, we need to opendir individually on those local subvols +and keep track of fds separately. Otherwise it is possible that opendir +failed on one of the subvol and we wind readdirp call on the fd to the +corresponding subvol, which will ultimately result in EINVAL error. + +> fixes: #1218 +> Change-Id: I50dd88b9597852a15579f4ee325918979417f570 +> Signed-off-by: Susant Palai <spalai@redhat.com> +(Backport of https://review.gluster.org/#/c/glusterfs/+/24404/) + +BUG: 1831403 +Change-Id: I96e19fdd630279c3ef44f361c1d1fc5c1c429821 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/200306 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-common.h | 2 + + xlators/cluster/dht/src/dht-rebalance.c | 74 +++++++++++++++++++++++---------- + 2 files changed, 54 insertions(+), 22 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 4d2aae6..8e65111 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -742,6 +742,8 @@ struct dir_dfmeta { + struct list_head **head; + struct list_head **iterator; + int *fetch_entries; ++ /* fds corresponding to local subvols only */ ++ fd_t **lfd; + }; + + typedef struct dht_migrate_info { +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 33cacfe..c692119 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -48,6 +48,8 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) + if (meta) { + for (i = 0; i < local_subvols_cnt; i++) { + gf_dirent_free(&meta->equeue[i]); ++ if (meta->lfd && meta->lfd[i]) ++ fd_unref(meta->lfd[i]); + } + + GF_FREE(meta->equeue); +@@ -55,6 +57,7 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) + GF_FREE(meta->iterator); + GF_FREE(meta->offset_var); + GF_FREE(meta->fetch_entries); ++ GF_FREE(meta->lfd); + GF_FREE(meta); + } + } +@@ -3095,7 +3098,7 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req, + int *should_commit_hash, int *perrno) + { +- int ret = -1; ++ int ret = 0; + char is_linkfile = 0; + gf_dirent_t *df_entry = NULL; + struct dht_container *tmp_container = NULL; +@@ -3111,6 +3114,13 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + } + + if (dir_dfmeta->fetch_entries[i] == 1) { ++ if (!fd) { ++ dir_dfmeta->fetch_entries[i] = 0; ++ dir_dfmeta->offset_var[i].readdir_done = 1; ++ ret = 0; ++ goto out; ++ } ++ + ret = syncop_readdirp(conf->local_subvols[i], fd, 131072, + dir_dfmeta->offset_var[i].offset, + &(dir_dfmeta->equeue[i]), xattr_req, NULL); +@@ -3270,7 +3280,6 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dict_t *migrate_data, int *perrno) + { + int ret = -1; +- fd_t *fd = NULL; + dht_conf_t *conf = NULL; + gf_dirent_t entries; + dict_t *xattr_req = NULL; +@@ -3304,28 +3313,49 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + goto out; + } + +- fd = fd_create(loc->inode, defrag->pid); +- if (!fd) { +- gf_log(this->name, GF_LOG_ERROR, "Failed to create fd"); ++ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); ++ if (!dir_dfmeta) { ++ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); + ret = -1; + goto out; + } + +- ret = syncop_opendir(this, loc, fd, NULL, NULL); +- if (ret) { +- gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_DATA_FAILED, +- "Migrate data failed: Failed to open dir %s", loc->path); +- *perrno = -ret; ++ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *), ++ gf_common_mt_pointer); ++ if (!dir_dfmeta->lfd) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, ++ "could not allocate memory for dir_dfmeta"); + ret = -1; ++ *perrno = ENOMEM; + goto out; + } + +- fd_bind(fd); +- dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer); +- if (!dir_dfmeta) { +- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL"); +- ret = -1; +- goto out; ++ for (i = 0; i < local_subvols_cnt; i++) { ++ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid); ++ if (!dir_dfmeta->lfd[i]) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "failed to create fd"); ++ *perrno = ENOMEM; ++ ret = -1; ++ goto out; ++ } ++ ++ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i], ++ NULL, NULL); ++ if (ret) { ++ fd_unref(dir_dfmeta->lfd[i]); ++ dir_dfmeta->lfd[i] = NULL; ++ gf_smsg(this->name, GF_LOG_WARNING, 0, 0, ++ "failed to open dir: %s subvol: %s", loc->path, ++ conf->local_subvols[i]->name); ++ ++ if (conf->decommission_in_progress) { ++ *perrno = -ret; ++ ret = -1; ++ goto out; ++ } ++ } else { ++ fd_bind(dir_dfmeta->lfd[i]); ++ } + } + + dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)), +@@ -3360,6 +3390,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = -1; + goto out; + } ++ + ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, +@@ -3372,7 +3403,8 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int), + gf_common_mt_int); + if (!dir_dfmeta->fetch_entries) { +- gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->fetch_entries is NULL"); ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, ++ "could not allocate memory for dir_dfmeta->fetch_entries"); + ret = -1; + goto out; + } +@@ -3442,8 +3474,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ldfq_count <= MAX_MIGRATE_QUEUE_COUNT && + !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) { + ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf, +- defrag, fd, migrate_data, dir_dfmeta, +- xattr_req, &should_commit_hash, perrno); ++ defrag, dir_dfmeta->lfd[dfc_index], ++ migrate_data, dir_dfmeta, xattr_req, ++ &should_commit_hash, perrno); + + if (ret) { + gf_log(this->name, GF_LOG_WARNING, +@@ -3497,9 +3530,6 @@ out: + if (xattr_req) + dict_unref(xattr_req); + +- if (fd) +- fd_unref(fd); +- + if (ret == 0 && should_commit_hash == 0) { + ret = 2; + } +-- +1.8.3.1 + diff --git a/SOURCES/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch b/SOURCES/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch new file mode 100644 index 0000000..a395da3 --- /dev/null +++ b/SOURCES/0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch @@ -0,0 +1,53 @@ +From 05bd0226716516d37ead173c7d6924225bd474db Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Wed, 6 May 2020 07:24:38 -0400 +Subject: [PATCH 371/375] common-ha: cluster status shows "FAILOVER" when + actually HEALTHY + +pacemaker devs change the format of the ouput of `pcs status` + +Expected to find a line in the format: + + Online: .... + +but now it's + + * Online: ... + +And the `grep -E "^Online:" no longer finds the list of nodes that +are online. + +Also other lines now have '*' in first few characters of the line +throwing off `grep -x ...` + +https://review.gluster.org/#/c/glusterfs/+/24403/ + +Change-Id: Ia04a89e76914f2a455a755f0a93fa415f60aefd0 +BUG: 1823706 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/199442 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index df333a1..4ecf91b 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -919,8 +919,9 @@ status() + local index=1 + local nodes + +- # change tabs to spaces, strip leading spaces +- pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch} ++ # change tabs to spaces, strip leading spaces, including any ++ # new '*' at the beginning of a line introduced in pcs-0.10.x ++ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch} + + nodes[0]=${1}; shift + +-- +1.8.3.1 + diff --git a/SOURCES/0372-posix-fix-seek-functionality.patch b/SOURCES/0372-posix-fix-seek-functionality.patch new file mode 100644 index 0000000..7c286c2 --- /dev/null +++ b/SOURCES/0372-posix-fix-seek-functionality.patch @@ -0,0 +1,49 @@ +From 955fea10809861aa9b3da85d386c2cc92b319cdb Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Thu, 7 May 2020 18:57:37 +0300 +Subject: [PATCH 372/375] posix - fix seek functionality + +A wrong pointer check causes the offset returned by seek to be always +wrong + +backport of https://review.gluster.org/#/c/glusterfs/+/24412/ +>fixes: #1228 +>Change-Id: Iac4c6a163175617ac4f14544fc6b7c6fb4041cd6 +>Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> + +BUG: 1833017 +Change-Id: Iac4c6a163175617ac4f14544fc6b7c6fb4041cd6 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/199761 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/syncop.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 0de53c6..693970f 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -2881,12 +2881,13 @@ syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what, + SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset, + what, xdata_in); + +- if (*off) +- *off = args.offset; +- +- if (args.op_ret == -1) ++ if (args.op_ret < 0) { + return -args.op_errno; +- return args.op_ret; ++ } else { ++ if (off) ++ *off = args.offset; ++ return args.op_ret; ++ } + } + + int +-- +1.8.3.1 + diff --git a/SOURCES/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch b/SOURCES/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch new file mode 100644 index 0000000..7abaf0e --- /dev/null +++ b/SOURCES/0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch @@ -0,0 +1,51 @@ +From bbf43008e6d21d649536547f500662b940562c3e Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Mon, 11 May 2020 10:02:08 +0100 +Subject: [PATCH 373/375] build: geo-rep sub-pkg requires + policycoreutils-python-utils on rhel8 + +glusterfs-geo-replication sub-package requires policycoreutils-python-utils +on rhel8 to set relevant selinux boolean to allow rsync. + +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24433/ + >Change-Id: Ia0fdcfdd8c7d18cd194e011f6b365bf5cb70a20a + >Fixes: #1236 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1825177 +Change-Id: Ia0fdcfdd8c7d18cd194e011f6b365bf5cb70a20a +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/200242 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 5ed07e7..9def416 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -523,6 +523,8 @@ Requires: util-linux + Requires: %{name}-libs%{?_isa} = %{version}-%{release} + # required for setting selinux bools + %if ( 0%{?rhel} && 0%{?rhel} >= 8 ) ++Requires(post): policycoreutils-python-utils ++Requires(postun): policycoreutils-python-utils + Requires: selinux-policy-targeted + Requires(post): selinux-policy-targeted + BuildRequires: selinux-policy-devel +@@ -1978,6 +1980,10 @@ fi + %endif + + %changelog ++ ++* Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com> ++- added requires policycoreutils-python-utils on rhel8 for geo-replication ++ + * Tue Aug 27 2019 Hari Gowtham <hgowtham@redhat.com> + - Added scripts to collect machine stats and component stats (#1719171) + +-- +1.8.3.1 + diff --git a/SOURCES/0374-open-behind-fix-missing-fd-reference.patch b/SOURCES/0374-open-behind-fix-missing-fd-reference.patch new file mode 100644 index 0000000..94a1fb9 --- /dev/null +++ b/SOURCES/0374-open-behind-fix-missing-fd-reference.patch @@ -0,0 +1,121 @@ +From 30cbdf8c06145a0c290da42ecc0a7eae928200b7 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Sun, 8 Mar 2020 18:36:45 +0100 +Subject: [PATCH 374/375] open-behind: fix missing fd reference + +Open behind was not keeping any reference on fd's pending to be +opened. This makes it possible that a concurrent close and en entry +fop (unlink, rename, ...) caused destruction of the fd while it +was still being used. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24204 +> Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +> Fixes: bz#1810934 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +BUG: 1830713 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/199714 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/open-behind/src/open-behind.c | 27 ++++++++++++++--------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 268c717..14ebc12 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -206,8 +206,13 @@ ob_fd_free(ob_fd_t *ob_fd) + if (ob_fd->xdata) + dict_unref(ob_fd->xdata); + +- if (ob_fd->open_frame) ++ if (ob_fd->open_frame) { ++ /* If we sill have a frame it means that background open has never ++ * been triggered. We need to release the pending reference. */ ++ fd_unref(ob_fd->fd); ++ + STACK_DESTROY(ob_fd->open_frame->root); ++ } + + GF_FREE(ob_fd); + } +@@ -297,6 +302,7 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + call_resume(stub); + } + ++ /* The background open is completed. We can release the 'fd' reference. */ + fd_unref(fd); + + STACK_DESTROY(frame->root); +@@ -331,7 +337,9 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) + } + + if (frame) { +- frame->local = fd_ref(fd); ++ /* We don't need to take a reference here. We already have a reference ++ * while the open is pending. */ ++ frame->local = fd; + + STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +@@ -345,15 +353,12 @@ void + ob_inode_wake(xlator_t *this, struct list_head *ob_fds) + { + ob_fd_t *ob_fd = NULL, *tmp = NULL; +- fd_t *fd = NULL; + + if (!list_empty(ob_fds)) { + list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) + { + ob_fd_wake(this, ob_fd->fd, ob_fd); +- fd = ob_fd->fd; + ob_fd_free(ob_fd); +- fd_unref(fd); + } + } + } +@@ -365,7 +370,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) + if (!src || !dst) + goto out; + +- dst->fd = __fd_ref(src->fd); ++ dst->fd = src->fd; + dst->loc.inode = inode_ref(src->loc.inode); + gf_uuid_copy(dst->loc.gfid, src->loc.gfid); + dst->flags = src->flags; +@@ -509,7 +514,6 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + + ob_fd->ob_inode = ob_inode; + +- /* don't do fd_ref, it'll cause leaks */ + ob_fd->fd = fd; + + ob_fd->open_frame = copy_frame(frame); +@@ -539,15 +543,16 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + } + UNLOCK(&fd->inode->lock); + +- if (!open_in_progress && !unlinked) { +- fd_ref(fd); ++ /* We take a reference while the background open is pending or being ++ * processed. If we finally wind the request in the foreground, then ++ * ob_fd_free() will take care of this additional reference. */ ++ fd_ref(fd); + ++ if (!open_in_progress && !unlinked) { + STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake(this, fd, NULL); +- +- fd_unref(fd); + } else { + ob_fd_free(ob_fd); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +-- +1.8.3.1 + diff --git a/SOURCES/0375-features-shard-Send-correct-size-when-reads-are-sent.patch b/SOURCES/0375-features-shard-Send-correct-size-when-reads-are-sent.patch new file mode 100644 index 0000000..32f9c19 --- /dev/null +++ b/SOURCES/0375-features-shard-Send-correct-size-when-reads-are-sent.patch @@ -0,0 +1,75 @@ +From ac5b1b38e705bd0e4c00cc50580a71dfaa4d3b5f Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Wed, 7 Aug 2019 12:12:43 +0530 +Subject: [PATCH 375/375] features/shard: Send correct size when reads are sent + beyond file size + +Backport of: +> https://review.gluster.org/c/glusterfs/+/23175 +> Change-Id: I0cebaaf55c09eb1fb77a274268ff564e871b743b +> fixes bz#1738419 +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +Change-Id: I0cebaaf55c09eb1fb77a274268ff564e871b743b +BUG: 1802013 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/199570 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/shard/bug-1738419.t | 29 +++++++++++++++++++++++++++++ + xlators/features/shard/src/shard.c | 2 ++ + 2 files changed, 31 insertions(+) + create mode 100644 tests/bugs/shard/bug-1738419.t + +diff --git a/tests/bugs/shard/bug-1738419.t b/tests/bugs/shard/bug-1738419.t +new file mode 100644 +index 0000000..8d0a31d +--- /dev/null ++++ b/tests/bugs/shard/bug-1738419.t +@@ -0,0 +1,29 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 network.remote-dio off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.strict-o-direct on ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST dd if=/dev/zero of=$M0/metadata bs=501 count=1 ++ ++EXPECT "501" echo $("dd" if=$M0/metadata bs=4096 count=1 of=/dev/null iflag=direct 2>&1 | awk '/bytes/ {print $1}') ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index b224abd..9ed597b 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -4433,6 +4433,8 @@ out: + if (xdata) + local->xattr_rsp = dict_ref(xdata); + vec.iov_base = local->iobuf->ptr; ++ if (local->offset + local->req_size > local->prebuf.ia_size) ++ local->total_size = local->prebuf.ia_size - local->offset; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, +-- +1.8.3.1 + diff --git a/SOURCES/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch b/SOURCES/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch new file mode 100644 index 0000000..b295fc2 --- /dev/null +++ b/SOURCES/0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch @@ -0,0 +1,70 @@ +From 341d75642ecc4e27bc6fecb56eb98a0ba03d8544 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Mon, 23 Mar 2020 11:47:10 +0530 +Subject: [PATCH 376/379] features/shard: Fix crash during shards cleanup in + error cases + +Backport of: +> https://review.gluster.org/c/glusterfs/+/24244 +> Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5 +> Fixes: #1127 +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +A crash is seen during a reattempt to clean up shards in background +upon remount. And this happens even on remount (which means a remount +is no workaround for the crash). + +In such a situation, the in-memory base inode object will not be +existent (new process, non-existent base shard). +So local->resolver_base_inode will be NULL. + +In the event of an error (in this case, of space running out), the +process would crash at the time of logging the error in the following line - + + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, + "failed to delete shards of %s", + uuid_utoa(local->resolver_base_inode->gfid)); + +Fixed that by using local->base_gfid as the source of gfid when +local->resolver_base_inode is NULL. + +Change-Id: I0b49f2b58becd0d8874b3d4b14ff8d92a89d02d5 +BUG: 1836233 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/200689 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/shard/src/shard.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 9ed597b..ee38ed2 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -2729,13 +2729,20 @@ int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; ++ uuid_t gfid = { ++ 0, ++ }; + + local = frame->local; + ++ if (local->resolver_base_inode) ++ gf_uuid_copy(gfid, local->resolver_base_inode->gfid); ++ else ++ gf_uuid_copy(gfid, local->base_gfid); ++ + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, +- "failed to delete shards of %s", +- uuid_utoa(local->resolver_base_inode->gfid)); ++ "failed to delete shards of %s", uuid_utoa(gfid)); + return 0; + } + local->op_ret = 0; +-- +1.8.3.1 + diff --git a/SOURCES/0377-syncop-improve-scaling-and-implement-more-tools.patch b/SOURCES/0377-syncop-improve-scaling-and-implement-more-tools.patch new file mode 100644 index 0000000..66cccc3 --- /dev/null +++ b/SOURCES/0377-syncop-improve-scaling-and-implement-more-tools.patch @@ -0,0 +1,862 @@ +From 66600fb55522d405a68d7340a5680a2633c4237e Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Thu, 30 Apr 2020 11:19:01 +0200 +Subject: [PATCH 377/379] syncop: improve scaling and implement more tools + +The current scaling of the syncop thread pool is not working properly +and can leave some tasks in the run queue more time than necessary +when the maximum number of threads is not reached. + +This patch provides a better scaling condition to react faster to +pending work. + +Condition variables and sleep in the context of a synctask have also +been implemented. Their purpose is to replace regular condition +variables and sleeps that block synctask threads and prevent other +tasks to be executed. + +The new features have been applied to several places in glusterd. + +upstream patch: https://review.gluster.org/#/c/glusterfs/+/24396/ + +> Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf +> Fixes: #1116 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: Ic50b7c73c104f9e41f08101a357d30b95efccfbf +BUG: 1810516 +Signed-off-by: Sanju Rakonde <srakonde@redhta.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/200409 +Tested-by: Sanju Rakonde <srakonde@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + libglusterfs/src/glusterfs/syncop.h | 52 +++- + libglusterfs/src/libglusterfs.sym | 7 + + libglusterfs/src/syncop.c | 306 ++++++++++++++++----- + xlators/cluster/dht/src/dht-rebalance.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 9 +- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 +- + .../mgmt/glusterd/src/glusterd-snapshot-utils.c | 5 +- + xlators/mgmt/glusterd/src/glusterd-syncop.h | 2 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 29 +- + xlators/mgmt/glusterd/src/glusterd.c | 2 + + xlators/mgmt/glusterd/src/glusterd.h | 2 + + 11 files changed, 317 insertions(+), 101 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h +index e0f1017..3011b4c 100644 +--- a/libglusterfs/src/glusterfs/syncop.h ++++ b/libglusterfs/src/glusterfs/syncop.h +@@ -15,6 +15,7 @@ + #include <sys/time.h> + #include <pthread.h> + #include <ucontext.h> ++#include "glusterfs/timer.h" + + #define SYNCENV_PROC_MAX 16 + #define SYNCENV_PROC_MIN 2 +@@ -32,6 +33,7 @@ + struct synctask; + struct syncproc; + struct syncenv; ++struct synccond; + + typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque); + +@@ -55,9 +57,12 @@ struct synctask { + call_frame_t *opframe; + synctask_cbk_t synccbk; + synctask_fn_t syncfn; +- synctask_state_t state; ++ struct timespec *delta; ++ gf_timer_t *timer; ++ struct synccond *synccond; + void *opaque; + void *stack; ++ synctask_state_t state; + int woken; + int slept; + int ret; +@@ -85,19 +90,21 @@ struct syncproc { + /* hosts the scheduler thread and framework for executing synctasks */ + struct syncenv { + struct syncproc proc[SYNCENV_PROC_MAX]; +- int procs; ++ ++ pthread_mutex_t mutex; ++ pthread_cond_t cond; + + struct list_head runq; +- int runcount; + struct list_head waitq; +- int waitcount; ++ ++ int procs; ++ int procs_idle; ++ ++ int runcount; + + int procmin; + int procmax; + +- pthread_mutex_t mutex; +- pthread_cond_t cond; +- + size_t stacksize; + + int destroy; /* FLAG to mark syncenv is in destroy mode +@@ -123,6 +130,13 @@ struct synclock { + }; + typedef struct synclock synclock_t; + ++struct synccond { ++ pthread_mutex_t pmutex; ++ pthread_cond_t pcond; ++ struct list_head waitq; ++}; ++typedef struct synccond synccond_t; ++ + struct syncbarrier { + gf_boolean_t initialized; /*Set on successful initialization*/ + pthread_mutex_t guard; /* guard the remaining members, pair @cond */ +@@ -219,7 +233,7 @@ struct syncopctx { + #define __yield(args) \ + do { \ + if (args->task) { \ +- synctask_yield(args->task); \ ++ synctask_yield(args->task, NULL); \ + } else { \ + pthread_mutex_lock(&args->mutex); \ + { \ +@@ -307,7 +321,9 @@ synctask_join(struct synctask *task); + void + synctask_wake(struct synctask *task); + void +-synctask_yield(struct synctask *task); ++synctask_yield(struct synctask *task, struct timespec *delta); ++void ++synctask_sleep(int32_t secs); + void + synctask_waitfor(struct synctask *task, int count); + +@@ -405,6 +421,24 @@ synclock_trylock(synclock_t *lock); + int + synclock_unlock(synclock_t *lock); + ++int32_t ++synccond_init(synccond_t *cond); ++ ++void ++synccond_destroy(synccond_t *cond); ++ ++int ++synccond_wait(synccond_t *cond, synclock_t *lock); ++ ++int ++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta); ++ ++void ++synccond_signal(synccond_t *cond); ++ ++void ++synccond_broadcast(synccond_t *cond); ++ + int + syncbarrier_init(syncbarrier_t *barrier); + int +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 467a1b7..5a721e0 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -938,6 +938,12 @@ syncbarrier_destroy + syncbarrier_init + syncbarrier_wait + syncbarrier_wake ++synccond_init ++synccond_destroy ++synccond_wait ++synccond_timedwait ++synccond_signal ++synccond_broadcast + syncenv_destroy + syncenv_new + synclock_destroy +@@ -1015,6 +1021,7 @@ synctask_new + synctask_new1 + synctask_set + synctask_setid ++synctask_sleep + synctask_wake + synctask_yield + sys_access +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 693970f..71d37b7 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -154,10 +154,14 @@ out: + return ret; + } + ++void * ++syncenv_processor(void *thdata); ++ + static void + __run(struct synctask *task) + { + struct syncenv *env = NULL; ++ int32_t total, ret, i; + + env = task->env; + +@@ -173,7 +177,6 @@ __run(struct synctask *task) + env->runcount--; + break; + case SYNCTASK_WAIT: +- env->waitcount--; + break; + case SYNCTASK_DONE: + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, +@@ -187,8 +190,27 @@ __run(struct synctask *task) + } + + list_add_tail(&task->all_tasks, &env->runq); +- env->runcount++; + task->state = SYNCTASK_RUN; ++ ++ env->runcount++; ++ ++ total = env->procs + env->runcount - env->procs_idle; ++ if (total > env->procmax) { ++ total = env->procmax; ++ } ++ if (total > env->procs) { ++ for (i = 0; i < env->procmax; i++) { ++ if (env->proc[i].env == NULL) { ++ env->proc[i].env = env; ++ ret = gf_thread_create(&env->proc[i].processor, NULL, ++ syncenv_processor, &env->proc[i], ++ "sproc%d", i); ++ if ((ret < 0) || (++env->procs >= total)) { ++ break; ++ } ++ } ++ } ++ } + } + + static void +@@ -210,7 +232,6 @@ __wait(struct synctask *task) + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK, + "re-waiting already waiting " + "task"); +- env->waitcount--; + break; + case SYNCTASK_DONE: + gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK, +@@ -223,12 +244,11 @@ __wait(struct synctask *task) + } + + list_add_tail(&task->all_tasks, &env->waitq); +- env->waitcount++; + task->state = SYNCTASK_WAIT; + } + + void +-synctask_yield(struct synctask *task) ++synctask_yield(struct synctask *task, struct timespec *delta) + { + xlator_t *oldTHIS = THIS; + +@@ -237,6 +257,8 @@ synctask_yield(struct synctask *task) + task->proc->sched.uc_flags &= ~_UC_TLSBASE; + #endif + ++ task->delta = delta; ++ + if (task->state != SYNCTASK_DONE) { + task->state = SYNCTASK_SUSPEND; + } +@@ -249,6 +271,35 @@ synctask_yield(struct synctask *task) + } + + void ++synctask_sleep(int32_t secs) ++{ ++ struct timespec delta; ++ struct synctask *task; ++ ++ task = synctask_get(); ++ ++ if (task == NULL) { ++ sleep(secs); ++ } else { ++ delta.tv_sec = secs; ++ delta.tv_nsec = 0; ++ ++ synctask_yield(task, &delta); ++ } ++} ++ ++static void ++__synctask_wake(struct synctask *task) ++{ ++ task->woken = 1; ++ ++ if (task->slept) ++ __run(task); ++ ++ pthread_cond_broadcast(&task->env->cond); ++} ++ ++void + synctask_wake(struct synctask *task) + { + struct syncenv *env = NULL; +@@ -257,13 +308,18 @@ synctask_wake(struct synctask *task) + + pthread_mutex_lock(&env->mutex); + { +- task->woken = 1; ++ if (task->timer != NULL) { ++ if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) { ++ goto unlock; ++ } + +- if (task->slept) +- __run(task); ++ task->timer = NULL; ++ task->synccond = NULL; ++ } + +- pthread_cond_broadcast(&env->cond); ++ __synctask_wake(task); + } ++unlock: + pthread_mutex_unlock(&env->mutex); + } + +@@ -282,7 +338,7 @@ synctask_wrap(void) + + task->state = SYNCTASK_DONE; + +- synctask_yield(task); ++ synctask_yield(task, NULL); + } + + void +@@ -422,11 +478,6 @@ synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn, + } + + synctask_wake(newtask); +- /* +- * Make sure someone's there to execute anything we just put on the +- * run queue. +- */ +- syncenv_scale(env); + + return newtask; + err: +@@ -520,8 +571,12 @@ syncenv_task(struct syncproc *proc) + goto unlock; + } + ++ env->procs_idle++; ++ + sleep_till.tv_sec = time(NULL) + SYNCPROC_IDLE_TIME; + ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till); ++ ++ env->procs_idle--; + } + + task = list_entry(env->runq.next, struct synctask, all_tasks); +@@ -540,6 +595,34 @@ unlock: + return task; + } + ++static void ++synctask_timer(void *data) ++{ ++ struct synctask *task = data; ++ struct synccond *cond; ++ ++ cond = task->synccond; ++ if (cond != NULL) { ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_del_init(&task->waitq); ++ task->synccond = NULL; ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ task->ret = -ETIMEDOUT; ++ } ++ ++ pthread_mutex_lock(&task->env->mutex); ++ ++ gf_timer_call_cancel(task->xl->ctx, task->timer); ++ task->timer = NULL; ++ ++ __synctask_wake(task); ++ ++ pthread_mutex_unlock(&task->env->mutex); ++} ++ + void + synctask_switchto(struct synctask *task) + { +@@ -572,7 +655,14 @@ synctask_switchto(struct synctask *task) + } else { + task->slept = 1; + __wait(task); ++ ++ if (task->delta != NULL) { ++ task->timer = gf_timer_call_after(task->xl->ctx, *task->delta, ++ synctask_timer, task); ++ } + } ++ ++ task->delta = NULL; + } + pthread_mutex_unlock(&env->mutex); + } +@@ -580,65 +670,18 @@ synctask_switchto(struct synctask *task) + void * + syncenv_processor(void *thdata) + { +- struct syncenv *env = NULL; + struct syncproc *proc = NULL; + struct synctask *task = NULL; + + proc = thdata; +- env = proc->env; +- +- for (;;) { +- task = syncenv_task(proc); +- if (!task) +- break; + ++ while ((task = syncenv_task(proc)) != NULL) { + synctask_switchto(task); +- +- syncenv_scale(env); + } + + return NULL; + } + +-void +-syncenv_scale(struct syncenv *env) +-{ +- int diff = 0; +- int scale = 0; +- int i = 0; +- int ret = 0; +- +- pthread_mutex_lock(&env->mutex); +- { +- if (env->procs > env->runcount) +- goto unlock; +- +- scale = env->runcount; +- if (scale > env->procmax) +- scale = env->procmax; +- if (scale > env->procs) +- diff = scale - env->procs; +- while (diff) { +- diff--; +- for (; (i < env->procmax); i++) { +- if (env->proc[i].processor == 0) +- break; +- } +- +- env->proc[i].env = env; +- ret = gf_thread_create(&env->proc[i].processor, NULL, +- syncenv_processor, &env->proc[i], +- "sproc%03hx", env->procs & 0x3ff); +- if (ret) +- break; +- env->procs++; +- i++; +- } +- } +-unlock: +- pthread_mutex_unlock(&env->mutex); +-} +- + /* The syncenv threads are cleaned up in this routine. + */ + void +@@ -715,12 +758,13 @@ syncenv_new(size_t stacksize, int procmin, int procmax) + newenv->stacksize = stacksize; + newenv->procmin = procmin; + newenv->procmax = procmax; ++ newenv->procs_idle = 0; + + for (i = 0; i < newenv->procmin; i++) { + newenv->proc[i].env = newenv; + ret = gf_thread_create(&newenv->proc[i].processor, NULL, + syncenv_processor, &newenv->proc[i], "sproc%d", +- newenv->procs); ++ i); + if (ret) + break; + newenv->procs++; +@@ -810,7 +854,7 @@ __synclock_lock(struct synclock *lock) + task->woken = 0; + list_add_tail(&task->waitq, &lock->waitq); + pthread_mutex_unlock(&lock->guard); +- synctask_yield(task); ++ synctask_yield(task, NULL); + /* task is removed from waitq in unlock, + * under lock->guard.*/ + pthread_mutex_lock(&lock->guard); +@@ -963,6 +1007,136 @@ synclock_unlock(synclock_t *lock) + return ret; + } + ++/* Condition variables */ ++ ++int32_t ++synccond_init(synccond_t *cond) ++{ ++ int32_t ret; ++ ++ INIT_LIST_HEAD(&cond->waitq); ++ ++ ret = pthread_mutex_init(&cond->pmutex, NULL); ++ if (ret != 0) { ++ return -ret; ++ } ++ ++ ret = pthread_cond_init(&cond->pcond, NULL); ++ if (ret != 0) { ++ pthread_mutex_destroy(&cond->pmutex); ++ } ++ ++ return -ret; ++} ++ ++void ++synccond_destroy(synccond_t *cond) ++{ ++ pthread_cond_destroy(&cond->pcond); ++ pthread_mutex_destroy(&cond->pmutex); ++} ++ ++int ++synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta) ++{ ++ struct timespec now; ++ struct synctask *task = NULL; ++ int ret; ++ ++ task = synctask_get(); ++ ++ if (task == NULL) { ++ if (delta != NULL) { ++ timespec_now_realtime(&now); ++ timespec_adjust_delta(&now, *delta); ++ } ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ if (delta == NULL) { ++ ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex); ++ } else { ++ ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now); ++ } ++ } else { ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_add_tail(&task->waitq, &cond->waitq); ++ task->synccond = cond; ++ ++ ret = synclock_unlock(lock); ++ if (ret == 0) { ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ synctask_yield(task, delta); ++ ++ ret = synclock_lock(lock); ++ if (ret == 0) { ++ ret = task->ret; ++ } ++ task->ret = 0; ++ ++ return ret; ++ } ++ ++ list_del_init(&task->waitq); ++ } ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ return ret; ++} ++ ++int ++synccond_wait(synccond_t *cond, synclock_t *lock) ++{ ++ return synccond_timedwait(cond, lock, NULL); ++} ++ ++void ++synccond_signal(synccond_t *cond) ++{ ++ struct synctask *task; ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ if (!list_empty(&cond->waitq)) { ++ task = list_first_entry(&cond->waitq, struct synctask, waitq); ++ list_del_init(&task->waitq); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ synctask_wake(task); ++ } else { ++ pthread_cond_signal(&cond->pcond); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ } ++} ++ ++void ++synccond_broadcast(synccond_t *cond) ++{ ++ struct list_head list; ++ struct synctask *task; ++ ++ INIT_LIST_HEAD(&list); ++ ++ pthread_mutex_lock(&cond->pmutex); ++ ++ list_splice_init(&cond->waitq, &list); ++ pthread_cond_broadcast(&cond->pcond); ++ ++ pthread_mutex_unlock(&cond->pmutex); ++ ++ while (!list_empty(&list)) { ++ task = list_first_entry(&list, struct synctask, waitq); ++ list_del_init(&task->waitq); ++ ++ synctask_wake(task); ++ } ++} ++ + /* Barriers */ + + int +@@ -1032,7 +1206,7 @@ __syncbarrier_wait(struct syncbarrier *barrier, int waitfor) + /* called within a synctask */ + list_add_tail(&task->waitq, &barrier->waitq); + pthread_mutex_unlock(&barrier->guard); +- synctask_yield(task); ++ synctask_yield(task, NULL); + pthread_mutex_lock(&barrier->guard); + } else { + /* called by a non-synctask */ +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index c692119..957deaa 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -5224,7 +5224,7 @@ gf_defrag_pause_tier(xlator_t *this, gf_defrag_info_t *defrag) + defrag->tier_conf.pause_timer = gf_timer_call_after( + this->ctx, delta, gf_defrag_pause_tier_timeout, this); + +- synctask_yield(defrag->tier_conf.pause_synctask); ++ synctask_yield(defrag->tier_conf.pause_synctask, NULL); + + if (gf_defrag_get_pause_state(&defrag->tier_conf) == TIER_PAUSED) + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 0d29de2..6475611 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -6076,13 +6076,8 @@ glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr, + static void + glusterd_wait_for_blockers(glusterd_conf_t *priv) + { +- uint64_t blockers = GF_ATOMIC_GET(priv->blockers); +- +- while (blockers) { +- synclock_unlock(&priv->big_lock); +- sleep(1); +- blockers = GF_ATOMIC_GET(priv->blockers); +- synclock_lock(&priv->big_lock); ++ while (GF_ATOMIC_GET(priv->blockers)) { ++ synccond_wait(&priv->cond_blockers, &priv->big_lock); + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index 36018a0..f55a5fd 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -112,7 +112,7 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) + goto out; + + synclock_unlock(&conf->big_lock); +- sleep(1); ++ synctask_sleep(1); + synclock_lock(&conf->big_lock); + if (gf_is_service_running(proc->pidfile, &pid)) { + ret = kill(pid, SIGKILL); +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +index d225854..386eed2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +@@ -1961,9 +1961,7 @@ glusterd_update_snaps_synctask(void *opaque) + synclock_lock(&conf->big_lock); + + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + +@@ -2070,6 +2068,7 @@ out: + if (dict) + dict_unref(dict); + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h +index ce4a940..a265f21 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h +@@ -32,7 +32,7 @@ + ret = gd_syncop_submit_request(rpc, req, stb, cookie, prog, procnum, \ + cbk, (xdrproc_t)xdrproc); \ + if (!ret) \ +- synctask_yield(stb->task); \ ++ synctask_yield(stb->task, NULL); \ + else \ + gf_asprintf(&stb->errstr, \ + "%s failed. Check log file" \ +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 812c698..ce9931c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5068,22 +5068,22 @@ glusterd_import_friend_volumes_synctask(void *opaque) + * restarted (refer glusterd_restart_bricks ()) + */ + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + + while (i <= count) { + ret = glusterd_import_friend_volume(peer_data, i); + if (ret) { +- conf->restart_bricks = _gf_false; +- goto out; ++ break; + } + i++; + } +- glusterd_svcs_manager(NULL); ++ if (i > count) { ++ glusterd_svcs_manager(NULL); ++ } + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + out: + if (peer_data) + dict_unref(peer_data); +@@ -5769,7 +5769,9 @@ my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame) + call_frame_t *frame = v_frame; + glusterd_conf_t *conf = frame->this->private; + +- GF_ATOMIC_DEC(conf->blockers); ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + + STACK_DESTROY(frame->root); + return 0; +@@ -5865,7 +5867,9 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count, + } + } + out: +- GF_ATOMIC_DEC(conf->blockers); ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + STACK_DESTROY(frame->root); + return 0; + } +@@ -6053,7 +6057,7 @@ attach_brick(xlator_t *this, glusterd_brickinfo_t *brickinfo, + * TBD: see if there's a better way + */ + synclock_unlock(&conf->big_lock); +- sleep(1); ++ synctask_sleep(1); + synclock_lock(&conf->big_lock); + } + +@@ -6193,7 +6197,7 @@ find_compat_brick_in_vol(glusterd_conf_t *conf, + "brick %s is still" + " starting, waiting for 2 seconds ", + other_brick->path); +- sleep(2); ++ synctask_sleep(2); + synclock_lock(&conf->big_lock); + retries--; + } +@@ -6680,9 +6684,7 @@ glusterd_restart_bricks(void *opaque) + * glusterd_compare_friend_data ()) + */ + while (conf->restart_bricks) { +- synclock_unlock(&conf->big_lock); +- sleep(2); +- synclock_lock(&conf->big_lock); ++ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock); + } + conf->restart_bricks = _gf_true; + +@@ -6798,6 +6800,7 @@ out: + GF_ATOMIC_DEC(conf->blockers); + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; ++ synccond_broadcast(&conf->cond_restart_bricks); + + return_block: + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c +index d360312..a01034a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.c ++++ b/xlators/mgmt/glusterd/src/glusterd.c +@@ -1845,6 +1845,8 @@ init(xlator_t *this) + (void)strncpy(conf->rundir, rundir, sizeof(conf->rundir)); + + synclock_init(&conf->big_lock, SYNC_LOCK_RECURSIVE); ++ synccond_init(&conf->cond_restart_bricks); ++ synccond_init(&conf->cond_blockers); + pthread_mutex_init(&conf->xprt_lock, NULL); + INIT_LIST_HEAD(&conf->xprt_list); + pthread_mutex_init(&conf->import_volumes, NULL); +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 2be005c..1c6c3b1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -209,6 +209,8 @@ typedef struct { + dict_t *opts; + synclock_t big_lock; + gf_boolean_t restart_done; ++ synccond_t cond_restart_bricks; ++ synccond_t cond_blockers; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint32_t max_port; +-- +1.8.3.1 + diff --git a/SOURCES/0378-Revert-open-behind-fix-missing-fd-reference.patch b/SOURCES/0378-Revert-open-behind-fix-missing-fd-reference.patch new file mode 100644 index 0000000..e228be2 --- /dev/null +++ b/SOURCES/0378-Revert-open-behind-fix-missing-fd-reference.patch @@ -0,0 +1,120 @@ +From d79660ccc65f163e0d9cf91cc13a199bec04d5f1 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez Juan <xhernandez@redhat.com> +Date: Wed, 20 May 2020 12:55:43 +0000 +Subject: [PATCH 378/379] Revert "open-behind: fix missing fd reference" + +This reverts commit 30cbdf8c06145a0c290da42ecc0a7eae928200b7. + +The patch is not complete because there have been some crash reports +upstream recently after the patch was released. A new patch that should +cover all corner cases is under review (), but it's a big change and it +could be risky to backport it without enough testing. + +Since there exists a workaround to avoid the problem (disable +open-behind), for now we revert the patch. + +Change-Id: I9cfc55623c33758cf5530b18f03c0d795b0f650b +BUG: 1830713 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/200952 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/open-behind/src/open-behind.c | 27 +++++++++-------------- + 1 file changed, 11 insertions(+), 16 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 14ebc12..268c717 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -206,13 +206,8 @@ ob_fd_free(ob_fd_t *ob_fd) + if (ob_fd->xdata) + dict_unref(ob_fd->xdata); + +- if (ob_fd->open_frame) { +- /* If we sill have a frame it means that background open has never +- * been triggered. We need to release the pending reference. */ +- fd_unref(ob_fd->fd); +- ++ if (ob_fd->open_frame) + STACK_DESTROY(ob_fd->open_frame->root); +- } + + GF_FREE(ob_fd); + } +@@ -302,7 +297,6 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + call_resume(stub); + } + +- /* The background open is completed. We can release the 'fd' reference. */ + fd_unref(fd); + + STACK_DESTROY(frame->root); +@@ -337,9 +331,7 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) + } + + if (frame) { +- /* We don't need to take a reference here. We already have a reference +- * while the open is pending. */ +- frame->local = fd; ++ frame->local = fd_ref(fd); + + STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +@@ -353,12 +345,15 @@ void + ob_inode_wake(xlator_t *this, struct list_head *ob_fds) + { + ob_fd_t *ob_fd = NULL, *tmp = NULL; ++ fd_t *fd = NULL; + + if (!list_empty(ob_fds)) { + list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) + { + ob_fd_wake(this, ob_fd->fd, ob_fd); ++ fd = ob_fd->fd; + ob_fd_free(ob_fd); ++ fd_unref(fd); + } + } + } +@@ -370,7 +365,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) + if (!src || !dst) + goto out; + +- dst->fd = src->fd; ++ dst->fd = __fd_ref(src->fd); + dst->loc.inode = inode_ref(src->loc.inode); + gf_uuid_copy(dst->loc.gfid, src->loc.gfid); + dst->flags = src->flags; +@@ -514,6 +509,7 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + + ob_fd->ob_inode = ob_inode; + ++ /* don't do fd_ref, it'll cause leaks */ + ob_fd->fd = fd; + + ob_fd->open_frame = copy_frame(frame); +@@ -543,16 +539,15 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + } + UNLOCK(&fd->inode->lock); + +- /* We take a reference while the background open is pending or being +- * processed. If we finally wind the request in the foreground, then +- * ob_fd_free() will take care of this additional reference. */ +- fd_ref(fd); +- + if (!open_in_progress && !unlinked) { ++ fd_ref(fd); ++ + STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake(this, fd, NULL); ++ ++ fd_unref(fd); + } else { + ob_fd_free(ob_fd); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +-- +1.8.3.1 + diff --git a/SOURCES/0379-glusterd-add-missing-synccond_broadcast.patch b/SOURCES/0379-glusterd-add-missing-synccond_broadcast.patch new file mode 100644 index 0000000..cd51c6d --- /dev/null +++ b/SOURCES/0379-glusterd-add-missing-synccond_broadcast.patch @@ -0,0 +1,45 @@ +From e06882a7fea9720a2899f7d52d5d3866ff098866 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Thu, 21 May 2020 08:26:11 +0200 +Subject: [PATCH 379/379] glusterd: add missing synccond_broadcast() + +After the changes in commit 3da22f8cb08b05562a4c6bd2694f2f19199cff7f, +there was a place where synccond_broadcast() was missing. It could +cause a hang if another synctask was waiting on the condition variable. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24476 +> Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204 +> Fixes: #1116 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: I92bfe4e15c5c3591e4854a64aa9e1566d50dd204 +BUG: 1810516 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/201057 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ce9931c..c92cdf3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -6797,9 +6797,11 @@ glusterd_restart_bricks(void *opaque) + ret = 0; + + out: +- GF_ATOMIC_DEC(conf->blockers); + conf->restart_done = _gf_true; + conf->restart_bricks = _gf_false; ++ if (GF_ATOMIC_DEC(conf->blockers) == 0) { ++ synccond_broadcast(&conf->cond_blockers); ++ } + synccond_broadcast(&conf->cond_restart_bricks); + + return_block: +-- +1.8.3.1 + diff --git a/SOURCES/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch b/SOURCES/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch new file mode 100644 index 0000000..05915d9 --- /dev/null +++ b/SOURCES/0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch @@ -0,0 +1,306 @@ +From 2cf22e54c8424949607c4a20df84887b838b2702 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Fri, 15 May 2020 11:29:36 +0530 +Subject: [PATCH 380/382] features/shard: Aggregate size, block-count in iatt + before unwinding setxattr + +Backport of: +> Upstream patch - https://review.gluster.org/c/glusterfs/+/24471 +> Fixes: #1243 +> Change-Id: I4da0eceb4235b91546df79270bcc0af8cd64e9ea +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +Posix translator returns pre and postbufs in the dict in {F}SETXATTR fops. +These iatts are further cached at layers like md-cache. +Shard translator, in its current state, simply returns these values without +updating the aggregated file size and block-count. + +This patch fixes this problem. + +Change-Id: I4da0eceb4235b91546df79270bcc0af8cd64e9ea +BUG: 1823423 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/201135 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + tests/bugs/shard/issue-1243.t | 31 ++++++ + xlators/features/shard/src/shard.c | 218 +++++++++++++++++++++++++++++++++---- + 2 files changed, 225 insertions(+), 24 deletions(-) + create mode 100644 tests/bugs/shard/issue-1243.t + +diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t +new file mode 100644 +index 0000000..b0c092c +--- /dev/null ++++ b/tests/bugs/shard/issue-1243.t +@@ -0,0 +1,31 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 features.shard on ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.strict-o-direct on ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++TEST $CLI volume set $V0 md-cache-timeout 10 ++ ++# Write data into a file such that its size crosses shard-block-size ++TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct ++ ++# Execute a setxattr on the file. ++TEST setfattr -n trusted.libvirt -v some-value $M0/foo ++ ++# Size of the file should be the aggregated size, not the shard-block-size ++EXPECT '8388608' stat -c %s $M0/foo ++ ++cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index ee38ed2..6ae4c41 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -5929,36 +5929,206 @@ out: + return 0; + } + +-int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- dict_t *dict, int32_t flags, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, ++ xlator_t *this, int32_t op_ret, ++ int32_t op_errno, dict_t *xdata) { ++ int ret = -1; ++ struct iatt *prebuf = NULL; ++ struct iatt *postbuf = NULL; ++ struct iatt *stbuf = NULL; ++ data_t *data = NULL; ++ shard_local_t *local = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ local = frame->local; + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, +- fd, dict, flags, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); +- return 0; ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } ++ ++ if (!xdata) ++ goto unwind; ++ ++ data = dict_get(xdata, GF_PRESTAT); ++ if (data) { ++ stbuf = data_to_iatt(data, GF_PRESTAT); ++ prebuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); ++ if (prebuf == NULL) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ *prebuf = *stbuf; ++ prebuf->ia_size = local->prebuf.ia_size; ++ prebuf->ia_blocks = local->prebuf.ia_blocks; ++ ret = dict_set_iatt(xdata, GF_PRESTAT, prebuf, false); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ prebuf = NULL; ++ } ++ ++ data = dict_get(xdata, GF_POSTSTAT); ++ if (data) { ++ stbuf = data_to_iatt(data, GF_POSTSTAT); ++ postbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); ++ if (postbuf == NULL) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ *postbuf = *stbuf; ++ postbuf->ia_size = local->prebuf.ia_size; ++ postbuf->ia_blocks = local->prebuf.ia_blocks; ++ ret = dict_set_iatt(xdata, GF_POSTSTAT, postbuf, false); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ postbuf = NULL; ++ } ++ ++unwind: ++ if (local->fd) ++ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ return 0; ++ ++err: ++ GF_FREE(prebuf); ++ GF_FREE(postbuf); ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; + } + +-int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- dict_t *dict, int32_t flags, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame, ++ xlator_t *this) { ++ shard_local_t *local = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); +- } ++ local = frame->local; + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, +- loc, dict, flags, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); +- return 0; ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ if (local->fd) ++ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, local->fd, ++ local->xattr_req, local->flags, local->xattr_rsp); ++ else ++ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, &local->loc, ++ local->xattr_req, local->flags, local->xattr_rsp); ++ return 0; ++} ++ ++int32_t shard_common_set_xattr(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, loc_t *loc, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ int ret = -1; ++ int op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ inode_t *inode = loc ? loc->inode : fd->inode; ++ ++ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, ++ xdata); ++ return 0; ++ } ++ ++ /* Sharded or not, if shard's special xattrs are attempted to be set, ++ * fail the fop with EPERM (except if the client is gsyncd. ++ */ ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err); ++ } ++ ++ ret = shard_inode_ctx_get_block_size(inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, ++ xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ local->fop = fop; ++ if (loc) { ++ if (loc_copy(&local->loc, loc) != 0) ++ goto err; ++ } ++ ++ if (fd) { ++ local->fd = fd_ref(fd); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ } ++ local->flags = flags; ++ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict ++ * and the xdata dict ++ */ ++ if (dict) ++ local->xattr_req = dict_ref(dict); ++ if (xdata) ++ local->xattr_rsp = dict_ref(xdata); ++ ++ /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is ++ * on an fd. This comes under a generic class of bugs in shard tracked by ++ * bz #1782428. ++ */ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_set_xattr_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(fop, frame, -1, op_errno); ++ return 0; ++} ++ ++int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, ++ xdata); ++ return 0; ++} ++ ++int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ dict_t *dict, int32_t flags, dict_t *xdata) { ++ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, ++ xdata); ++ return 0; + } + + int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { +-- +1.8.3.1 + diff --git a/SOURCES/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch b/SOURCES/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch new file mode 100644 index 0000000..aa875a2 --- /dev/null +++ b/SOURCES/0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch @@ -0,0 +1,48 @@ +From 63ea2aad2474a0ca169342c699cb1689e6c1d83f Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Fri, 22 May 2020 13:49:14 +0530 +Subject: [PATCH 381/382] dht: add null check in gf_defrag_free_dir_dfmeta + +Backport of https://review.gluster.org/#/c/glusterfs/+/24479/ + +BUG:1812789 +Change-Id: I502ed43051bd60d9e5d2b69d4e4d7b6eea997285 +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/201150 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 957deaa..8f31dca 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -47,7 +47,8 @@ gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt) + + if (meta) { + for (i = 0; i < local_subvols_cnt; i++) { +- gf_dirent_free(&meta->equeue[i]); ++ if (meta->equeue) ++ gf_dirent_free(&meta->equeue[i]); + if (meta->lfd && meta->lfd[i]) + fd_unref(meta->lfd[i]); + } +@@ -3344,9 +3345,9 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + if (ret) { + fd_unref(dir_dfmeta->lfd[i]); + dir_dfmeta->lfd[i] = NULL; +- gf_smsg(this->name, GF_LOG_WARNING, 0, 0, +- "failed to open dir: %s subvol: %s", loc->path, +- conf->local_subvols[i]->name); ++ gf_msg(this->name, GF_LOG_WARNING, -ret, 0, ++ "failed to open dir: %s subvol: %s", loc->path, ++ conf->local_subvols[i]->name); + + if (conf->decommission_in_progress) { + *perrno = -ret; +-- +1.8.3.1 + diff --git a/SOURCES/0382-features-shard-Aggregate-file-size-block-count-befor.patch b/SOURCES/0382-features-shard-Aggregate-file-size-block-count-befor.patch new file mode 100644 index 0000000..a6528f5 --- /dev/null +++ b/SOURCES/0382-features-shard-Aggregate-file-size-block-count-befor.patch @@ -0,0 +1,422 @@ +From 4097a748cbb7616d78886b35e3360177d570b7a6 Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Fri, 22 May 2020 13:25:26 +0530 +Subject: [PATCH 382/382] features/shard: Aggregate file size, block-count + before unwinding removexattr + +Posix translator returns pre and postbufs in the dict in {F}REMOVEXATTR fops. +These iatts are further cached at layers like md-cache. +Shard translator, in its current state, simply returns these values without +updating the aggregated file size and block-count. + +This patch fixes this problem. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24480 +> Change-Id: I4b2dd41ede472c5829af80a67401ec5a6376d872 +> Fixes: #1243 +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +Change-Id: I4b2dd41ede472c5829af80a67401ec5a6376d872 +BUG: 1823423 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/201456 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/shard/issue-1243.t | 12 ++ + xlators/features/shard/src/shard.c | 293 ++++++++++++++++++++++++++----------- + xlators/features/shard/src/shard.h | 1 + + 3 files changed, 224 insertions(+), 82 deletions(-) + +diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t +index b0c092c..ba22d2b 100644 +--- a/tests/bugs/shard/issue-1243.t ++++ b/tests/bugs/shard/issue-1243.t +@@ -1,6 +1,7 @@ + #!/bin/bash + + . $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc + + cleanup; + +@@ -22,10 +23,21 @@ TEST $CLI volume set $V0 md-cache-timeout 10 + # Write data into a file such that its size crosses shard-block-size + TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct + ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ + # Execute a setxattr on the file. + TEST setfattr -n trusted.libvirt -v some-value $M0/foo + + # Size of the file should be the aggregated size, not the shard-block-size + EXPECT '8388608' stat -c %s $M0/foo + ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 ++ ++# Execute a removexattr on the file. ++TEST setfattr -x trusted.libvirt $M0/foo ++ ++# Size of the file should be the aggregated size, not the shard-block-size ++EXPECT '8388608' stat -c %s $M0/foo + cleanup +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 6ae4c41..2e2ef5d 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -442,6 +442,9 @@ void shard_local_wipe(shard_local_t *local) { + loc_wipe(&local->int_entrylk.loc); + loc_wipe(&local->newloc); + ++ if (local->name) ++ GF_FREE(local->name); ++ + if (local->int_entrylk.basename) + GF_FREE(local->int_entrylk.basename); + if (local->fd) +@@ -5819,46 +5822,216 @@ int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, + return 0; + } + +-int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, +- const char *name, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t ++shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local, ++ char *key) ++{ ++ int ret = 0; ++ struct iatt *tmpbuf = NULL; ++ struct iatt *stbuf = NULL; ++ data_t *data = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ if (!xdata) ++ return 0; + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ data = dict_get(xdata, key); ++ if (!data) ++ return 0; + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); +- return 0; ++ tmpbuf = data_to_iatt(data, key); ++ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); ++ if (stbuf == NULL) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ *stbuf = *tmpbuf; ++ stbuf->ia_size = local->prebuf.ia_size; ++ stbuf->ia_blocks = local->prebuf.ia_blocks; ++ ret = dict_set_iatt(xdata, key, stbuf, false); ++ if (ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; ++ goto err; ++ } ++ return 0; ++ ++err: ++ GF_FREE(stbuf); ++ return -1; + } + +-int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, +- const char *name, dict_t *xdata) { +- int op_errno = EINVAL; ++int32_t ++shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *xdata) ++{ ++ int ret = -1; ++ shard_local_t *local = NULL; + +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { +- GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); +- } ++ local = frame->local; + +- if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { +- dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); +- dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); +- } ++ if (op_ret < 0) { ++ local->op_ret = op_ret; ++ local->op_errno = op_errno; ++ goto err; ++ } + +- STACK_WIND_TAIL(frame, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); +- return 0; +-out: +- shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); +- return 0; ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); ++ if (ret < 0) ++ goto err; ++ ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); ++ if (ret < 0) ++ goto err; ++ ++ if (local->fd) ++ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ else ++ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno, ++ xdata); ++ return 0; ++ ++err: ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++} ++ ++int32_t ++shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this) ++{ ++ shard_local_t *local = NULL; ++ ++ local = frame->local; ++ ++ if (local->op_ret < 0) { ++ shard_common_failure_unwind(local->fop, frame, local->op_ret, ++ local->op_errno); ++ return 0; ++ } ++ ++ if (local->fd) ++ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, local->fd, ++ local->name, local->xattr_req); ++ else ++ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, &local->loc, ++ local->name, local->xattr_req); ++ return 0; ++} ++ ++int32_t ++shard_common_remove_xattr(call_frame_t *frame, xlator_t *this, ++ glusterfs_fop_t fop, loc_t *loc, fd_t *fd, ++ const char *name, dict_t *xdata) ++{ ++ int ret = -1; ++ int op_errno = ENOMEM; ++ uint64_t block_size = 0; ++ shard_local_t *local = NULL; ++ inode_t *inode = loc ? loc->inode : fd->inode; ++ ++ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, ++ xdata); ++ return 0; ++ } ++ ++ /* If shard's special xattrs are attempted to be removed, ++ * fail the fop with EPERM (except if the client is gsyncd). ++ */ ++ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err); ++ } ++ ++ /* Repeat the same check for bulk-removexattr */ ++ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { ++ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); ++ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); ++ } ++ ++ ret = shard_inode_ctx_get_block_size(inode, this, &block_size); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, ++ "Failed to get block size from inode ctx of %s", ++ uuid_utoa(inode->gfid)); ++ goto err; ++ } ++ ++ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { ++ if (loc) ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->removexattr, loc, name, ++ xdata); ++ else ++ STACK_WIND_TAIL(frame, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->fremovexattr, fd, name, ++ xdata); ++ return 0; ++ } ++ ++ local = mem_get0(this->local_pool); ++ if (!local) ++ goto err; ++ ++ frame->local = local; ++ local->fop = fop; ++ if (loc) { ++ if (loc_copy(&local->loc, loc) != 0) ++ goto err; ++ } ++ ++ if (fd) { ++ local->fd = fd_ref(fd); ++ local->loc.inode = inode_ref(fd->inode); ++ gf_uuid_copy(local->loc.gfid, fd->inode->gfid); ++ } ++ ++ if (name) { ++ local->name = gf_strdup(name); ++ if (!local->name) ++ goto err; ++ } ++ ++ if (xdata) ++ local->xattr_req = dict_ref(xdata); ++ ++ /* To-Do: Switch from LOOKUP which is path-based, to FSTAT if the fop is ++ * on an fd. This comes under a generic class of bugs in shard tracked by ++ * bz #1782428. ++ */ ++ shard_lookup_base_file(frame, this, &local->loc, ++ shard_post_lookup_remove_xattr_handler); ++ return 0; ++err: ++ shard_common_failure_unwind(fop, frame, -1, op_errno); ++ return 0; ++} ++ ++int32_t ++shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, ++ const char *name, dict_t *xdata) ++{ ++ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name, ++ xdata); ++ return 0; ++} ++ ++int32_t ++shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, ++ const char *name, dict_t *xdata) ++{ ++ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name, ++ xdata); ++ return 0; + } + + int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +@@ -5933,10 +6106,6 @@ int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { + int ret = -1; +- struct iatt *prebuf = NULL; +- struct iatt *postbuf = NULL; +- struct iatt *stbuf = NULL; +- data_t *data = NULL; + shard_local_t *local = NULL; + + local = frame->local; +@@ -5947,52 +6116,14 @@ int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, + goto err; + } + +- if (!xdata) +- goto unwind; +- +- data = dict_get(xdata, GF_PRESTAT); +- if (data) { +- stbuf = data_to_iatt(data, GF_PRESTAT); +- prebuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); +- if (prebuf == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- *prebuf = *stbuf; +- prebuf->ia_size = local->prebuf.ia_size; +- prebuf->ia_blocks = local->prebuf.ia_blocks; +- ret = dict_set_iatt(xdata, GF_PRESTAT, prebuf, false); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- prebuf = NULL; +- } ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT); ++ if (ret < 0) ++ goto err; + +- data = dict_get(xdata, GF_POSTSTAT); +- if (data) { +- stbuf = data_to_iatt(data, GF_POSTSTAT); +- postbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char); +- if (postbuf == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- *postbuf = *stbuf; +- postbuf->ia_size = local->prebuf.ia_size; +- postbuf->ia_blocks = local->prebuf.ia_blocks; +- ret = dict_set_iatt(xdata, GF_POSTSTAT, postbuf, false); +- if (ret < 0) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto err; +- } +- postbuf = NULL; +- } ++ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT); ++ if (ret < 0) ++ goto err; + +-unwind: + if (local->fd) + SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno, + xdata); +@@ -6002,8 +6133,6 @@ unwind: + return 0; + + err: +- GF_FREE(prebuf); +- GF_FREE(postbuf); + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 04abd62..1721417 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -318,6 +318,7 @@ typedef struct shard_local { + uint32_t deletion_rate; + gf_boolean_t cleanup_required; + uuid_t base_gfid; ++ char *name; + } shard_local_t; + + typedef struct shard_inode_ctx { +-- +1.8.3.1 + diff --git a/SOURCES/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch b/SOURCES/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch new file mode 100644 index 0000000..3adaa65 --- /dev/null +++ b/SOURCES/0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch @@ -0,0 +1,38 @@ +From f880df2ce4706dd748a09d3d6db57d49f62a234c Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Thu, 28 May 2020 08:26:47 -0400 +Subject: [PATCH 383/383] common-ha: ganesha-ha.sh bad test for {rhel,centos} + for pcs options + +bash [[ ... =~ ... ]] built-in returns _0_ when the regex matches, +not 1, thus the sense of the test is backwards and never correctly +detects rhel or centos. + +https://review.gluster.org/#/c/glusterfs/+/24502/ + +Change-Id: Ic9e60aae4ea38aff8f13979080995e60621a68fe +BUG: 1840794 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/201686 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 4ecf91b..a6814b1 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -1054,7 +1054,7 @@ main() + # Fedora 29+ and rhel/centos 8 has PCS-0.10.x + # default is pcs-0.10.x options but check for + # rhel/centos 7 (pcs-0.9.x) and adjust accordingly +- if [[ ${ID} =~ {rhel,centos} ]]; then ++ if [[ ! ${ID} =~ {rhel,centos} ]]; then + if [[ ${VERSION_ID} == 7.* ]]; then + PCS9OR10_PCS_CNAME_OPTION="--name" + PCS9OR10_PCS_CLONE_OPTION="--clone" +-- +1.8.3.1 + diff --git a/SOURCES/0384-Update-rfc.sh-to-rhgs-3.5.3.patch b/SOURCES/0384-Update-rfc.sh-to-rhgs-3.5.3.patch new file mode 100644 index 0000000..4db2222 --- /dev/null +++ b/SOURCES/0384-Update-rfc.sh-to-rhgs-3.5.3.patch @@ -0,0 +1,26 @@ +From 27dc773af276e33fcca10788fae17d131c8d9bce Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Sun, 31 May 2020 15:46:24 -0400 +Subject: [PATCH 384/449] Update rfc.sh to rhgs-3.5.3 + +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +--- + rfc.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rfc.sh b/rfc.sh +index 37d551f..1dca29f 100755 +--- a/rfc.sh ++++ b/rfc.sh +@@ -18,7 +18,7 @@ done + shift $((OPTIND-1)) + + +-branch="rhgs-3.5.2"; ++branch="rhgs-3.5.3"; + + set_hooks_commit_msg() + { +-- +1.8.3.1 + diff --git a/SOURCES/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch b/SOURCES/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch new file mode 100644 index 0000000..2b194d3 --- /dev/null +++ b/SOURCES/0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch @@ -0,0 +1,50 @@ +From 143f85f55ded7a9075408e97d05abd9568d56e7b Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Mon, 25 Nov 2019 16:35:42 +0530 +Subject: [PATCH 385/449] glusterd: start glusterd automatically on abnormal + shutdown + +If glusterd crashes or goes down abnormally, systemd should +automatically bring the glusterd up. + +With this change, systemd brings glusterd up for atmost 3 times +within time period of 1 hour. If the limit exceeds, we have to +start the glusterd manually and reset the failure count using +systemctl reset-failed. + +credits: John Strunk <jstrunk@redhat.com> + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23751/ +> fixes: bz#1776264 +> Change-Id: I312d243652fb13ba028814a2ea615b67e3b10b6a +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1663557 +Change-Id: I312d243652fb13ba028814a2ea615b67e3b10b6a +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202251 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/systemd/glusterd.service.in | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in +index f604160..b944762 100644 +--- a/extras/systemd/glusterd.service.in ++++ b/extras/systemd/glusterd.service.in +@@ -15,6 +15,11 @@ ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-leve + KillMode=process + TimeoutSec=300 + SuccessExitStatus=15 ++Restart=on-abnormal ++RestartSec=60 ++StartLimitBurst=3 ++StartLimitIntervalSec=3600 ++StartLimitInterval=3600 + + [Install] + WantedBy=multi-user.target +-- +1.8.3.1 + diff --git a/SOURCES/0386-glusterd-increase-the-StartLimitBurst.patch b/SOURCES/0386-glusterd-increase-the-StartLimitBurst.patch new file mode 100644 index 0000000..ff6d0f9 --- /dev/null +++ b/SOURCES/0386-glusterd-increase-the-StartLimitBurst.patch @@ -0,0 +1,39 @@ +From 02e7afdfb740db7cfa1a2f0f79933172d172ff27 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 7 Jan 2020 15:32:13 +0530 +Subject: [PATCH 386/449] glusterd: increase the StartLimitBurst + +Based on https://bugzilla.redhat.com/show_bug.cgi?id=1782200#c6 +increasing the limit. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23970/ +> fixes: bz#1782200 +> Change-Id: Ia885c7bdb2a90f0946c5268da894f6a4da5a69b7 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1663557 +Change-Id: Ia885c7bdb2a90f0946c5268da894f6a4da5a69b7 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202252 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/systemd/glusterd.service.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in +index b944762..699aea3 100644 +--- a/extras/systemd/glusterd.service.in ++++ b/extras/systemd/glusterd.service.in +@@ -17,7 +17,7 @@ TimeoutSec=300 + SuccessExitStatus=15 + Restart=on-abnormal + RestartSec=60 +-StartLimitBurst=3 ++StartLimitBurst=6 + StartLimitIntervalSec=3600 + StartLimitInterval=3600 + +-- +1.8.3.1 + diff --git a/SOURCES/0387-To-fix-readdir-ahead-memory-leak.patch b/SOURCES/0387-To-fix-readdir-ahead-memory-leak.patch new file mode 100644 index 0000000..b685215 --- /dev/null +++ b/SOURCES/0387-To-fix-readdir-ahead-memory-leak.patch @@ -0,0 +1,47 @@ +From d54f087a2484695ff7ac214d39f2750fddcef2d5 Mon Sep 17 00:00:00 2001 +From: HuangShujun <549702281@qq.com> +Date: Thu, 5 Dec 2019 10:07:10 +0200 +Subject: [PATCH 387/449] To fix readdir-ahead memory leak + +Glusterfs client process has memory leak if create several files under +one folder, and delete the folder. According to statedump, the ref +counts of readdir-ahead is bigger than zero in the inode table. + +Readdir-ahead get parent inode by inode_parent in rda_mark_inode_dirty +when each rda_writev_cbk,the inode ref count of parent folder will be +increased in inode_parent, but readdir-ahead do not unref it later. + +The correction is unref the parent inode at the end of +rda_mark_inode_dirty. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23815 +> Fixes: bz#1779055 +> Signed-off-by: HuangShujun <549702281@qq.com> +> Change-Id: Iee68ab1089cbc2fbc4185b93720fb1f66ee89524 + +BUG: 1781550 +Change-Id: Iee68ab1089cbc2fbc4185b93720fb1f66ee89524 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202312 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/readdir-ahead/src/readdir-ahead.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c +index 7fd4f8d..933941d 100644 +--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c ++++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c +@@ -254,6 +254,7 @@ rda_mark_inode_dirty(xlator_t *this, inode_t *inode) + } + } + UNLOCK(&parent->lock); ++ inode_unref(parent); + } + + return; +-- +1.8.3.1 + diff --git a/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch new file mode 100644 index 0000000..dc23ba8 --- /dev/null +++ b/SOURCES/0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch @@ -0,0 +1,142 @@ +From fbda9baaf7231e3237277348cc7e873f3113fd14 Mon Sep 17 00:00:00 2001 +From: l17zhou <cynthia.zhou@nokia-sbell.com.cn> +Date: Mon, 4 Nov 2019 08:45:52 +0200 +Subject: [PATCH 388/449] rpc: Cleanup SSL specific data at the time of freeing + rpc object + +Problem: At the time of cleanup rpc object ssl specific data + is not freeing so it has become a leak. + +Solution: To avoid the leak cleanup ssl specific data at the + time of cleanup rpc object + +> Credits: l17zhou <cynthia.zhou@nokia-sbell.com.cn> +> Fixes: bz#1768407 +> Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +> (Cherry pick from commit 54ed71dba174385ab0d8fa415e09262f6250430c) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23650/) + +Change-Id: I37f598673ae2d7a33c75f39eb8843ccc6dffaaf0 +BUG: 1786516 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202308 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 22 ++++++++++++++++++++-- + tests/features/ssl-authz.t | 23 ++++++++++++++++++++--- + 2 files changed, 40 insertions(+), 5 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 65845ea..226b2e2 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -446,6 +446,7 @@ ssl_setup_connection_postfix(rpc_transport_t *this) + gf_log(this->name, GF_LOG_DEBUG, + "SSL verification succeeded (client: %s) (server: %s)", + this->peerinfo.identifier, this->myinfo.identifier); ++ X509_free(peer); + return gf_strdup(peer_CN); + + /* Error paths. */ +@@ -1157,7 +1158,15 @@ __socket_reset(rpc_transport_t *this) + memset(&priv->incoming, 0, sizeof(priv->incoming)); + + event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx); +- ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } + priv->sock = -1; + priv->idx = -1; + priv->connected = -1; +@@ -3217,7 +3226,6 @@ socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in, + new_priv->sock = new_sock; + + new_priv->ssl_enabled = priv->ssl_enabled; +- new_priv->ssl_ctx = priv->ssl_ctx; + new_priv->connected = 1; + new_priv->is_server = _gf_true; + +@@ -4672,6 +4680,16 @@ fini(rpc_transport_t *this) + pthread_mutex_destroy(&priv->out_lock); + pthread_mutex_destroy(&priv->cond_lock); + pthread_cond_destroy(&priv->cond); ++ if (priv->use_ssl && priv->ssl_ssl) { ++ SSL_clear(priv->ssl_ssl); ++ SSL_free(priv->ssl_ssl); ++ priv->ssl_ssl = NULL; ++ } ++ if (priv->use_ssl && priv->ssl_ctx) { ++ SSL_CTX_free(priv->ssl_ctx); ++ priv->ssl_ctx = NULL; ++ } ++ + if (priv->ssl_private_key) { + GF_FREE(priv->ssl_private_key); + } +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index cae010c..132b598 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -25,6 +25,7 @@ TEST glusterd + TEST pidof glusterd + TEST $CLI volume info; + ++TEST $CLI v set all cluster.brick-multiplex on + # Construct a cipher list that excludes CBC because of POODLE. + # http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566 + # +@@ -45,12 +46,12 @@ TEST openssl genrsa -out $SSL_KEY 2048 + TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT + ln $SSL_CERT $SSL_CA + +-TEST $CLI volume create $V0 $H0:$B0/1 ++TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force + TEST $CLI volume set $V0 server.ssl on + TEST $CLI volume set $V0 client.ssl on + TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers) + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because ssl-allow=* by default. This effectively + # disables SSL authorization, though authentication and encryption might still +@@ -59,11 +60,27 @@ TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + TEST ping_file $M0/before + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + ++glusterfsd_pid=`pgrep glusterfsd` ++TEST [ $glusterfsd_pid != 0 ] ++start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++echo "Memory consumption for glusterfsd process" ++for i in $(seq 1 100); do ++ gluster v heal $V0 info >/dev/null ++done ++ ++end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` ++diff=$((end-start)) ++ ++# If memory consumption is more than 5M some leak in SSL code path ++ ++TEST [ $diff -lt 5000 ] ++ ++ + # Set ssl-allow to a wildcard that includes our identity. + TEST $CLI volume stop $V0 + TEST $CLI volume set $V0 auth.ssl-allow Any* + TEST $CLI volume start $V0 +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count + + # This mount should SUCCEED because we match the wildcard. + TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 +-- +1.8.3.1 + diff --git a/SOURCES/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch b/SOURCES/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch new file mode 100644 index 0000000..7f20fb2 --- /dev/null +++ b/SOURCES/0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch @@ -0,0 +1,297 @@ +From 50318713486e79d9258cf22e656caff402256dde Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Sun, 20 Oct 2019 22:01:01 +0530 +Subject: [PATCH 389/449] posix: Avoid diskpace error in case of overwriting + the data + +Problem: Sometime fops like posix_writev, posix_fallocate, posix_zerofile + failed and throw error ENOSPC if storage.reserve threshold limit + has reached even fops is overwriting the data + +Solution: Retry the fops in case of overwrite if diskspace check + is failed + +> Credits: kinsu <vpolakis@gmail.com> +> Change-Id: I987d73bcf47ed1bb27878df40c39751296e95fe8 +> Updates: #745 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit ca3e5905ac02fb9c373ac3de10b44f061d04cd6f) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23572/) + +Change-Id: I987d73bcf47ed1bb27878df40c39751296e95fe8 +BUG: 1787331 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202307 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/posix/bug-1651445.t | 1 + + xlators/storage/posix/src/posix-entry-ops.c | 1 - + xlators/storage/posix/src/posix-inode-fd-ops.c | 141 ++++++++++++++++++++++--- + 3 files changed, 126 insertions(+), 17 deletions(-) + +diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t +index 5248d47..4d08b69 100644 +--- a/tests/bugs/posix/bug-1651445.t ++++ b/tests/bugs/posix/bug-1651445.t +@@ -33,6 +33,7 @@ sleep 5 + # setup_lvm create lvm partition of 150M and 40M are reserve so after + # consuming more than 110M next dd should fail + TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1 ++TEST dd if=/dev/urandom of=$M0/a bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc + + rm -rf $M0/* + +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index 283b305..bea0bbf 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -1634,7 +1634,6 @@ posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); +- DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_ENTRY_HANDLE(real_oldpath, par_oldpath, this, oldloc, NULL); +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index a2a518f..bcce06e 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -692,6 +692,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + struct posix_private *priv = NULL; ++ gf_boolean_t check_space_error = _gf_false; ++ struct stat statbuf = { ++ 0, ++ }; + + DECLARE_OLD_FS_ID_VAR; + +@@ -711,7 +715,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + if (priv->disk_reserve) + posix_disk_space_check(this); + +- DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); ++ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); ++ ++overwrite: ++ check_space_error = _gf_true; + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { +@@ -735,7 +742,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); +- goto out; ++ goto unlock; + } + + if (xdata) { +@@ -745,7 +752,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + ret = -EIO; +- goto out; ++ goto unlock; + } + } + +@@ -756,7 +763,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + "fallocate failed on %s offset: %jd, " + "len:%zu, flags: %d", + uuid_utoa(fd->inode->gfid), offset, len, flags); +- goto out; ++ goto unlock; + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); +@@ -764,16 +771,47 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); +- goto out; ++ goto unlock; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); + +-out: ++unlock: + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } ++ ++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { ++#ifdef FALLOC_FL_KEEP_SIZE ++ if (flags & FALLOC_FL_KEEP_SIZE) { ++ goto overwrite; ++ } ++#endif ++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, ++ "pfd is NULL from fd=%p", fd); ++ goto out; ++ } ++ ++ if (sys_fstat(pfd->fd, &statbuf) < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, ++ "%d", pfd->fd); ++ goto out; ++ } ++ ++ if (offset + len <= statbuf.st_size) { ++ gf_msg_debug(this->name, 0, ++ "io vector size will not" ++ " change disk size so allow overwrite for" ++ " fd %d", ++ pfd->fd); ++ goto overwrite; ++ } ++ } ++ ++out: + SET_TO_OLD_FS_ID(); + if (ret == ENOSPC) + ret = -ENOSPC; +@@ -1083,25 +1121,57 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int op_ret = -1; + int op_errno = EINVAL; + dict_t *rsp_xdata = NULL; ++ gf_boolean_t check_space_error = _gf_false; ++ struct posix_fd *pfd = NULL; ++ struct stat statbuf = { ++ 0, ++ }; + +- VALIDATE_OR_GOTO(frame, out); +- VALIDATE_OR_GOTO(this, out); ++ VALIDATE_OR_GOTO(frame, unwind); ++ VALIDATE_OR_GOTO(this, unwind); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + ++overwrite: ++ check_space_error = _gf_true; + ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost, + xdata, &rsp_xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; +- goto out; ++ goto unwind; + } + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + + out: ++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { ++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, ++ "pfd is NULL from fd=%p", fd); ++ goto out; ++ } ++ ++ if (sys_fstat(pfd->fd, &statbuf) < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, ++ "%d", pfd->fd); ++ goto out; ++ } ++ ++ if (offset + len <= statbuf.st_size) { ++ gf_msg_debug(this->name, 0, ++ "io vector size will not" ++ " change disk size so allow overwrite for" ++ " fd %d", ++ pfd->fd); ++ goto overwrite; ++ } ++ } ++ ++unwind: + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, + rsp_xdata); + return 0; +@@ -1857,19 +1927,28 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_boolean_t write_append = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; ++ gf_boolean_t check_space_error = _gf_false; ++ struct stat statbuf = { ++ 0, ++ }; ++ int totlen = 0; ++ int idx = 0; + +- VALIDATE_OR_GOTO(frame, out); +- VALIDATE_OR_GOTO(this, out); +- VALIDATE_OR_GOTO(fd, out); +- VALIDATE_OR_GOTO(fd->inode, out); +- VALIDATE_OR_GOTO(vector, out); +- VALIDATE_OR_GOTO(this->private, out); ++ VALIDATE_OR_GOTO(frame, unwind); ++ VALIDATE_OR_GOTO(this, unwind); ++ VALIDATE_OR_GOTO(fd, unwind); ++ VALIDATE_OR_GOTO(fd->inode, unwind); ++ VALIDATE_OR_GOTO(vector, unwind); ++ VALIDATE_OR_GOTO(this->private, unwind); + + priv = this->private; + +- VALIDATE_OR_GOTO(priv, out); ++ VALIDATE_OR_GOTO(priv, unwind); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + ++overwrite: ++ ++ check_space_error = _gf_true; + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "writev received on a block/char file (%s)", +@@ -2011,6 +2090,36 @@ out: + locked = _gf_false; + } + ++ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { ++ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, ++ "pfd is NULL from fd=%p", fd); ++ goto unwind; ++ } ++ ++ if (sys_fstat(pfd->fd, &statbuf) < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, ++ "%d", pfd->fd); ++ goto unwind; ++ } ++ ++ for (idx = 0; idx < count; idx++) { ++ totlen = vector[idx].iov_len; ++ } ++ ++ if ((offset + totlen <= statbuf.st_size) && ++ !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) { ++ gf_msg_debug(this->name, 0, ++ "io vector size will not" ++ " change disk size so allow overwrite for" ++ " fd %d", ++ pfd->fd); ++ goto overwrite; ++ } ++ } ++ ++unwind: + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + +-- +1.8.3.1 + diff --git a/SOURCES/0390-glusterd-deafult-options-after-volume-reset.patch b/SOURCES/0390-glusterd-deafult-options-after-volume-reset.patch new file mode 100644 index 0000000..d95ce71 --- /dev/null +++ b/SOURCES/0390-glusterd-deafult-options-after-volume-reset.patch @@ -0,0 +1,93 @@ +From 86df0ced1cac0e3c48f6149bb2f5442f8548f89e Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Wed, 25 Dec 2019 21:56:32 +0530 +Subject: [PATCH 390/449] glusterd: deafult options after volume reset + +Problem: default option itransport.address-family is disappered +in volume info output after a volume reset. + +Cause: with 3.8.0 onwards volume option transport.address-family +has default value, any volume which is created will have this +option set. So, volume info will show this in its output. But, +with reset volume, this option is not handled. + +Solution: In glusterd_enable_default_options(), we should add this +option along with other default options. This function is called +by glusterd_options_reset() with volume reset command. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23921/ +> fixes: bz#1786478 +> Change-Id: I58f7aa24cf01f308c4efe6cae748cc3bc8b99b1d +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1781710 +Change-Id: I58f7aa24cf01f308c4efe6cae748cc3bc8b99b1d +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202258 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/optimized-basic-testcases.t | 5 +++++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 23 +++++++++++++++++++++++ + 2 files changed, 28 insertions(+) + +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index d700b5e..c7e8c32 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -69,6 +69,11 @@ TEST pidof glusterd; + TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; + EXPECT 'Created' volinfo_field $V0 'Status'; + ++#bug-1786478 - default volume option after volume reset ++addr_family=`volinfo_field $V0 'transport.address-family'` ++TEST $CLI volume reset $V0 ++EXPECT $addr_family volinfo_field $V0 'transport.address-family' ++ + #bug-955588 - uuid validation + + uuid=`grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=` +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index c92cdf3..6654741 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13032,6 +13032,11 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; ++#ifdef IPV6_DEFAULT ++ char *addr_family = "inet6"; ++#else ++ char *addr_family = "inet"; ++#endif + + this = THIS; + GF_ASSERT(this); +@@ -13109,6 +13114,24 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + } + } + } ++ ++ if (conf->op_version >= GD_OP_VERSION_3_9_0) { ++ if (!option || !strcmp("transport.address-family", option)) { ++ if (volinfo->transport_type == GF_TRANSPORT_TCP) { ++ ret = dict_set_dynstr_with_alloc( ++ volinfo->dict, "transport.address-family", addr_family); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, ++ GD_MSG_DICT_SET_FAILED, ++ "failed to set transport." ++ "address-family on %s", ++ volinfo->volname); ++ goto out; ++ } ++ } ++ } ++ } ++ + if (conf->op_version >= GD_OP_VERSION_7_0) { + ret = dict_set_dynstr_with_alloc(volinfo->dict, + "storage.fips-mode-rchecksum", "on"); +-- +1.8.3.1 + diff --git a/SOURCES/0391-glusterd-unlink-the-file-after-killing-the-process.patch b/SOURCES/0391-glusterd-unlink-the-file-after-killing-the-process.patch new file mode 100644 index 0000000..2a88254 --- /dev/null +++ b/SOURCES/0391-glusterd-unlink-the-file-after-killing-the-process.patch @@ -0,0 +1,39 @@ +From d23859d5cbd5823b2587811aa57030436ce9e74c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 17 Dec 2019 15:52:30 +0530 +Subject: [PATCH 391/449] glusterd: unlink the file after killing the process + +In glusterd_proc_stop(), after killing the pid +we should remove the pidfile. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23890/ +> fixes: bz#1784375 +> Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1784211 +Change-Id: Ib6367aed590932c884b0f6f892fc40542aa19686 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202257 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +index f55a5fd..a05c90d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c +@@ -107,6 +107,8 @@ glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags) + "service, reason:%s", + proc->name, strerror(errno)); + } ++ } else { ++ (void)glusterd_unlink_file(proc->pidfile); + } + if (flags != PROC_STOP_FORCE) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch b/SOURCES/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch new file mode 100644 index 0000000..e295e4f --- /dev/null +++ b/SOURCES/0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch @@ -0,0 +1,187 @@ +From a30a5fdef2e252eba9f44a3c671de8f3aa4f17d7 Mon Sep 17 00:00:00 2001 +From: Vishal Pandey <vpandey@redhat.com> +Date: Tue, 19 Nov 2019 11:39:22 +0530 +Subject: [PATCH 392/449] glusterd: Brick process fails to come up with + brickmux on + +Issue: +1- In a cluster of 3 Nodes N1, N2, N3. Create 3 volumes vol1, +vol2, vol3 with 3 bricks (one from each node) +2- Set cluster.brick-multiplex on +3- Start all 3 volumes +4- Check if all bricks on a node are running on same port +5- Kill N1 +6- Set performance.readdir-ahead for volumes vol1, vol2, vol3 +7- Bring N1 up and check volume status +8- All bricks processes not running on N1. + +Root Cause - +Since, There is a diff in volfile versions in N1 as compared +to N2 and N3 therefore glusterd_import_friend_volume() is called. +glusterd_import_friend_volume() copies the new_volinfo and deletes +old_volinfo and then calls glusterd_start_bricks(). +glusterd_start_bricks() looks for the volfiles and sends an rpc +request to glusterfs_handle_attach(). Now, since the volinfo +has been deleted by glusterd_delete_stale_volume() +from priv->volumes list before glusterd_start_bricks() and +glusterd_create_volfiles_and_notify_services() and +glusterd_list_add_order is called after glusterd_start_bricks(), +therefore the attach RPC req gets an empty volfile path +and that causes the brick to crash. + +Fix- Call glusterd_list_add_order() and +glusterd_create_volfiles_and_notify_services before +glusterd_start_bricks() cal is made in glusterd_import_friend_volume + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23724/ +> Change-Id: Idfe0e8710f7eb77ca3ddfa1cabeb45b2987f41aa +> Fixes: bz#1773856 +> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> + +BUG: 1683602 +Change-Id: Idfe0e8710f7eb77ca3ddfa1cabeb45b2987f41aa +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202255 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../glusterd/brick-mux-validation-in-cluster.t | 61 +++++++++++++++++++++- + xlators/mgmt/glusterd/src/glusterd-utils.c | 28 +++++----- + 2 files changed, 75 insertions(+), 14 deletions(-) + +diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +index 4e57038..f088dbb 100644 +--- a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t ++++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +@@ -7,6 +7,20 @@ function count_brick_processes { + pgrep glusterfsd | wc -l + } + ++function count_brick_pids { ++ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ ++ | grep -v "N/A" | sort | uniq | wc -l ++} ++ ++function count_N/A_brick_pids { ++ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ ++ | grep -- '\-1' | sort | uniq | wc -l ++} ++ ++function check_peers { ++ $CLI_2 peer status | grep 'Peer in Cluster (Connected)' | wc -l ++} ++ + cleanup; + + TEST launch_cluster 3 +@@ -48,4 +62,49 @@ TEST $CLI_1 volume stop $V1 + + EXPECT 3 count_brick_processes + +-cleanup ++TEST $CLI_1 volume stop $META_VOL ++ ++TEST $CLI_1 volume delete $META_VOL ++TEST $CLI_1 volume delete $V0 ++TEST $CLI_1 volume delete $V1 ++ ++#bug-1773856 - Brick process fails to come up with brickmux on ++ ++TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 $H3:$B3/${V0}1 force ++TEST $CLI_1 volume start $V0 ++ ++ ++EXPECT 3 count_brick_processes ++ ++#create and start a new volume ++TEST $CLI_1 volume create $V1 $H1:$B1/${V1}2 $H2:$B2/${V1}2 $H3:$B3/${V1}2 force ++TEST $CLI_1 volume start $V1 ++ ++EXPECT 3 count_brick_processes ++ ++V2=patchy2 ++TEST $CLI_1 volume create $V2 $H1:$B1/${V2}3 $H2:$B2/${V2}3 $H3:$B3/${V2}3 force ++TEST $CLI_1 volume start $V2 ++ ++EXPECT 3 count_brick_processes ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids ++ ++TEST kill_node 1 ++ ++sleep 10 ++ ++EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers; ++ ++$CLI_2 volume set $V0 performance.readdir-ahead on ++$CLI_2 volume set $V1 performance.readdir-ahead on ++ ++TEST $glusterd_1; ++ ++sleep 10 ++ ++EXPECT 4 count_brick_processes ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids ++ ++cleanup; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 6654741..1b78812 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -4988,16 +4988,6 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + glusterd_volinfo_unref(old_volinfo); + } + +- if (glusterd_is_volume_started(new_volinfo)) { +- (void)glusterd_start_bricks(new_volinfo); +- if (glusterd_is_snapd_enabled(new_volinfo)) { +- svc = &(new_volinfo->snapd.svc); +- if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { +- gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); +- } +- } +- } +- + ret = glusterd_store_volinfo(new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL, +@@ -5007,19 +4997,31 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + goto out; + } + +- ret = glusterd_create_volfiles_and_notify_services(new_volinfo); ++ ret = glusterd_create_volfiles(new_volinfo); + if (ret) + goto out; + ++ glusterd_list_add_order(&new_volinfo->vol_list, &priv->volumes, ++ glusterd_compare_volume_name); ++ ++ if (glusterd_is_volume_started(new_volinfo)) { ++ (void)glusterd_start_bricks(new_volinfo); ++ if (glusterd_is_snapd_enabled(new_volinfo)) { ++ svc = &(new_volinfo->snapd.svc); ++ if (svc->manager(svc, new_volinfo, PROC_START_NO_WAIT)) { ++ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name); ++ } ++ } ++ } ++ + ret = glusterd_import_quota_conf(peer_data, count, new_volinfo, "volume"); + if (ret) { + gf_event(EVENT_IMPORT_QUOTA_CONF_FAILED, "volume=%s", + new_volinfo->volname); + goto out; + } +- glusterd_list_add_order(&new_volinfo->vol_list, &priv->volumes, +- glusterd_compare_volume_name); + ++ ret = glusterd_fetchspec_notify(this); + out: + gf_msg_debug("glusterd", 0, "Returning with ret: %d", ret); + return ret; +-- +1.8.3.1 + diff --git a/SOURCES/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch b/SOURCES/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch new file mode 100644 index 0000000..bb93180 --- /dev/null +++ b/SOURCES/0393-afr-restore-timestamp-of-files-during-metadata-heal.patch @@ -0,0 +1,129 @@ +From b528c21e6fedc9ac841942828b82e0c808da5efb Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Thu, 2 Jan 2020 12:05:12 +0530 +Subject: [PATCH 393/449] afr: restore timestamp of files during metadata heal + +For files: During metadata heal, we restore timestamps +only for non-regular (char, block etc.) files. +Extenting it for regular files as timestamp is updated +via touch command also + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23953/ +> fixes: bz#1787274 +> Change-Id: I26fe4fb6dff679422ba4698a7f828bf62ca7ca18 +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1761531 +Change-Id: I26fe4fb6dff679422ba4698a7f828bf62ca7ca18 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202332 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1761531-metadata-heal-restore-time.t | 74 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-metadata.c | 8 +-- + 2 files changed, 76 insertions(+), 6 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t + +diff --git a/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t +new file mode 100644 +index 0000000..7e24eae +--- /dev/null ++++ b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t +@@ -0,0 +1,74 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup ++ ++GET_MDATA_PATH=$(dirname $0)/../../utils ++build_tester $GET_MDATA_PATH/get-mdata-xattr.c ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0..2} ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++TEST touch $M0/a ++sleep 1 ++TEST kill_brick $V0 $H0 $B0/brick0 ++TEST touch $M0/a ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++mtime0=$(get_mtime $B0/brick0/a) ++mtime1=$(get_mtime $B0/brick1/a) ++TEST [ $mtime0 -eq $mtime1 ] ++ ++ctime0=$(get_ctime $B0/brick0/a) ++ctime1=$(get_ctime $B0/brick1/a) ++TEST [ $ctime0 -eq $ctime1 ] ++ ++############################################################################### ++# Repeat the test with ctime feature disabled. ++TEST $CLI volume set $V0 features.ctime off ++ ++TEST touch $M0/b ++sleep 1 ++TEST kill_brick $V0 $H0 $B0/brick0 ++TEST touch $M0/b ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++ ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++mtime2=$(get_mtime $B0/brick0/b) ++mtime3=$(get_mtime $B0/brick1/b) ++TEST [ $mtime2 -eq $mtime3 ] ++ ++TEST rm $GET_MDATA_PATH/get-mdata-xattr ++ ++TEST force_umount $M0 ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index ecfa791..f4e31b6 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -421,12 +421,8 @@ afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode) + if (ret) + goto unlock; + +- /* Restore atime/mtime for files that don't need data heal as +- * restoring timestamps happens only as a part of data-heal. +- */ +- if (!IA_ISREG(locked_replies[source].poststat.ia_type)) +- afr_selfheal_restore_time(frame, this, inode, source, healed_sinks, +- locked_replies); ++ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks, ++ locked_replies); + + ret = afr_selfheal_undo_pending( + frame, this, inode, sources, sinks, healed_sinks, undid_pending, +-- +1.8.3.1 + diff --git a/SOURCES/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch b/SOURCES/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch new file mode 100644 index 0000000..96a8f74 --- /dev/null +++ b/SOURCES/0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch @@ -0,0 +1,38 @@ +From 768a6d9bca86c0a50128b8776c11ef2b6d36388d Mon Sep 17 00:00:00 2001 +From: Vishal Pandey <vpandey@redhat.com> +Date: Thu, 21 Nov 2019 12:56:34 +0530 +Subject: [PATCH 394/449] man/gluster: Add volume top command to gluster man + page + +> Upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23735/ +> Change-Id: Ib74607d2b2e5a1a0316221f1176a7dcccea632d4 +> Fixes: bz#1774866 +> Signed-off-by: Vishal Pandey <vpandey@redhat.com> + +BUG: 1754391 +Change-Id: Ib74607d2b2e5a1a0316221f1176a7dcccea632d4 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202333 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + doc/gluster.8 | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/doc/gluster.8 b/doc/gluster.8 +index da6472d..88cbf44 100644 +--- a/doc/gluster.8 ++++ b/doc/gluster.8 +@@ -113,6 +113,9 @@ Rotate the log file for corresponding volume/brick. + \fB\ volume profile <VOLNAME> {start|info [peek|incremental [peek]|cumulative|clear]|stop} [nfs] \fR + Profile operations on the volume. Once started, volume profile <volname> info provides cumulative statistics of the FOPs performed. + .TP ++\fB\ volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>] \fR ++Generates a profile of a volume representing the performance and bottlenecks/hotspots of each brick. ++.TP + \fB\ volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|priv|fd|inode|history]... | [client <hostname:process-id>]] \fR + Dumps the in memory state of the specified process or the bricks of the volume. + .TP +-- +1.8.3.1 + diff --git a/SOURCES/0395-Cli-Removing-old-log-rotate-command.patch b/SOURCES/0395-Cli-Removing-old-log-rotate-command.patch new file mode 100644 index 0000000..0918777 --- /dev/null +++ b/SOURCES/0395-Cli-Removing-old-log-rotate-command.patch @@ -0,0 +1,111 @@ +From 5b3fcc8db86b4dc7af1eb63315ca2ff41c60fdea Mon Sep 17 00:00:00 2001 +From: kshithijiyer <kshithij.ki@gmail.com> +Date: Sat, 30 Nov 2019 15:25:11 +0530 +Subject: [PATCH 395/449] [Cli] Removing old log rotate command. + +The old command for log rotate is still present removing +it completely. Also adding testcase to test the +log rotate command with both the old as well as the new command +and fixing testcase which use the old syntax to use the new +one. + +Code to be removed: +1. In cli-cmd-volume.c from struct cli_cmd volume_cmds[]: +{"volume log rotate <VOLNAME> [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick" + " NOTE: This is an old syntax, will be deprecated from next release."}, + +2. In cli-cmd-volume.c from cli_cmd_log_rotate_cbk(): + ||(strcmp("rotate", words[2]) == 0))) + +3. In cli-cmd-parser.c from cli_cmd_log_rotate_parse() +if (strcmp("rotate", words[2]) == 0) + volname = (char *)words[3]; +else + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23392/ +> fixes: bz#1750387 +> Change-Id: I56e4d295044e8d5fd1fc0d848bc87e135e9e32b4 +> Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> + +BUG: 1784415 +Change-Id: I56e4d295044e8d5fd1fc0d848bc87e135e9e32b4 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202334 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 2 -- + cli/src/cli-cmd-volume.c | 7 +------ + tests/bugs/glusterd/optimized-basic-testcases.t | 3 ++- + tests/bugs/glusterfs-server/bug-852147.t | 2 +- + 4 files changed, 4 insertions(+), 10 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 4456a7b..ac0a263 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2592,8 +2592,6 @@ cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options) + + if (strcmp("rotate", words[3]) == 0) + volname = (char *)words[2]; +- else if (strcmp("rotate", words[2]) == 0) +- volname = (char *)words[3]; + GF_ASSERT(volname); + + ret = dict_set_str(dict, "volname", volname); +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 754d333..f33fc99 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2349,8 +2349,7 @@ cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word, + goto out; + } + +- if (!((strcmp("rotate", words[2]) == 0) || +- (strcmp("rotate", words[3]) == 0))) { ++ if (!(strcmp("rotate", words[3]) == 0)) { + cli_usage_out(word->pattern); + parse_error = 1; + goto out; +@@ -3401,10 +3400,6 @@ struct cli_cmd volume_cmds[] = { + {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk, + "rotate the log file for corresponding volume/brick"}, + +- {"volume log rotate <VOLNAME> [BRICK]", cli_cmd_log_rotate_cbk, +- "rotate the log file for corresponding volume/brick" +- " NOTE: This is an old syntax, will be deprecated from next release."}, +- + {"volume sync <HOSTNAME> [all|<VOLNAME>]", cli_cmd_sync_volume_cbk, + "sync the volume information from a peer"}, + +diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t +index c7e8c32..862f329 100644 +--- a/tests/bugs/glusterd/optimized-basic-testcases.t ++++ b/tests/bugs/glusterd/optimized-basic-testcases.t +@@ -129,7 +129,8 @@ TEST ! $CLI volume set all $V0 cluster.op-version $OP_VERS_NEW + + #bug-1022055 - validate log rotate command + +-TEST $CLI volume log rotate $V0; ++TEST ! $CLI volume log rotate $V0; ++TEST $CLI volume log $V0 rotate; + + #bug-1092841 - validating barrier enable/disable + +diff --git a/tests/bugs/glusterfs-server/bug-852147.t b/tests/bugs/glusterfs-server/bug-852147.t +index c644cfa..75db2a2 100755 +--- a/tests/bugs/glusterfs-server/bug-852147.t ++++ b/tests/bugs/glusterfs-server/bug-852147.t +@@ -66,7 +66,7 @@ ren_file=$log_file".*" + rm -rf $ren_file + + #Initiating log rotate +-TEST $CLI volume log rotate $V0 ++TEST $CLI volume log $V0 rotate + + #Capturing new log file's size + new_file_size=`file-size $log_file` +-- +1.8.3.1 + diff --git a/SOURCES/0396-Updating-gluster-manual.patch b/SOURCES/0396-Updating-gluster-manual.patch new file mode 100644 index 0000000..bb33d10 --- /dev/null +++ b/SOURCES/0396-Updating-gluster-manual.patch @@ -0,0 +1,56 @@ +From 728aab1c1cfcf352d4ca1fde0b80044dc24bd9fa Mon Sep 17 00:00:00 2001 +From: Rishubh Jain <risjain@redhat.com> +Date: Sun, 18 Aug 2019 18:02:57 +0530 +Subject: [PATCH 396/449] Updating gluster manual. + +Adding disperse-data to gluster manual under +volume create command + +> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23258/ +> Change-Id: Ic9eb47c9e71a1d7a11af9394c615c8e90f8d1d69 +> Fixes: bz#1668239 +> Signed-off-by: Rishubh Jain <risjain@redhat.com> +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1667954 +Change-Id: Ic9eb47c9e71a1d7a11af9394c615c8e90f8d1d69 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202342 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + doc/gluster.8 | 2 +- + tests/basic/glusterd/disperse-create.t | 4 ++++ + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/doc/gluster.8 b/doc/gluster.8 +index 88cbf44..66bdb48 100644 +--- a/doc/gluster.8 ++++ b/doc/gluster.8 +@@ -41,7 +41,7 @@ List all volumes in cluster + \fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad|tierd]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR + Display status of all or specified volume(s)/brick + .TP +-\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR ++\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR + Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp). + To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option. + .TP +diff --git a/tests/basic/glusterd/disperse-create.t b/tests/basic/glusterd/disperse-create.t +index 384c675..db8a621 100644 +--- a/tests/basic/glusterd/disperse-create.t ++++ b/tests/basic/glusterd/disperse-create.t +@@ -20,6 +20,10 @@ TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/b7 $H0:$B0/b8 $H0:$B + EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks" + + TEST $CLI volume delete $V0 ++TEST $CLI volume create $V0 disperse-data 2 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12 ++EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks" ++ ++TEST $CLI volume delete $V0 + TEST $CLI volume create $V0 redundancy 1 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12 + EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks" + +-- +1.8.3.1 + diff --git a/SOURCES/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch b/SOURCES/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch new file mode 100644 index 0000000..6694813 --- /dev/null +++ b/SOURCES/0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch @@ -0,0 +1,52 @@ +From 73cef29731c0d7b8b4f3b880c032dc232b8fcc31 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Thu, 4 Jun 2020 16:06:44 +0530 +Subject: [PATCH 397/449] mgmt/brick-mux: Avoid sending two response when + attach is failed. + +We were sending two response back to glusterd when an attach is +failed. One from the handler function glusterfs_handle_attach and +another from rpcsvc_check_and_reply_error. It was causing problems +like ref leaks, transport disconnect etc. + +> Change-Id: I3bb5b59959530760b568d52becb519499b3dcd2b +> updates: bz#1785143 +> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +> (Cherry pick from commit 42f484dcecd9942611396d9bd2ad3a39019b0e1f) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23906/) + +Change-Id: I3bb5b59959530760b568d52becb519499b3dcd2b +BUG: 1776901 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202346 +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfsd/src/glusterfsd-mgmt.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c +index 15acc10..61d1b21 100644 +--- a/glusterfsd/src/glusterfsd-mgmt.c ++++ b/glusterfsd/src/glusterfsd-mgmt.c +@@ -954,7 +954,15 @@ glusterfs_handle_attach(rpcsvc_request_t *req) + ret = -1; + } + +- glusterfs_translator_info_response_send(req, ret, NULL, NULL); ++ ret = glusterfs_translator_info_response_send(req, ret, NULL, NULL); ++ if (ret) { ++ /* Response sent back to glusterd, req is already destroyed. So ++ * resetting the ret to 0. Otherwise another response will be ++ * send from rpcsvc_check_and_reply_error. Which will lead to ++ * double resource leak. ++ */ ++ ret = 0; ++ } + + out: + UNLOCK(&ctx->volfile_lock); +-- +1.8.3.1 + diff --git a/SOURCES/0398-ec-change-error-message-for-heal-commands-for-disper.patch b/SOURCES/0398-ec-change-error-message-for-heal-commands-for-disper.patch new file mode 100644 index 0000000..5779539 --- /dev/null +++ b/SOURCES/0398-ec-change-error-message-for-heal-commands-for-disper.patch @@ -0,0 +1,75 @@ +From 03d2c7b52da5efd6ad660315a0548c8b91e51439 Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Sun, 22 Dec 2019 22:52:30 +0530 +Subject: [PATCH 398/449] ec: change error message for heal commands for + disperse volume + +Currently when we issue a heal statistics or similar commands +for disperse volume, it fails with message "Volume is not of +type replicate." Adding message "this command is supported for +volumes of type replicate" to reflect supportability and better +understanding of heal functionality for disperse volumes. + +> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23916/ +> fixes: bz#1785998 +> Change-Id: I9688a9fdf427cb6f657cfd5b8db2f76a6c56f6e2 +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1487177 +Change-Id: I9688a9fdf427cb6f657cfd5b8db2f76a6c56f6e2 +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202344 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + heal/src/glfs-heal.c | 15 ++++++++++----- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 4 +++- + 2 files changed, 13 insertions(+), 6 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 7e37e47..125b12c 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -1726,14 +1726,19 @@ main(int argc, char **argv) + goto out; + } + ++ char *var_str = (heal_op == GF_SHD_OP_INDEX_SUMMARY || ++ heal_op == GF_SHD_OP_HEAL_SUMMARY) ++ ? "replicate/disperse" ++ : "replicate"; ++ + ret = glfsh_validate_volume(top_subvol, heal_op); + if (ret < 0) { + ret = -EINVAL; +- gf_asprintf(&op_errstr, "Volume %s is not of type %s", volname, +- (heal_op == GF_SHD_OP_INDEX_SUMMARY || +- heal_op == GF_SHD_OP_HEAL_SUMMARY) +- ? "replicate/disperse" +- : "replicate"); ++ gf_asprintf(&op_errstr, ++ "This command is supported " ++ "for only volumes of %s type. Volume %s " ++ "is not of type %s", ++ var_str, volname, var_str); + goto out; + } + rootloc.inode = inode_ref(top_subvol->itable->root); +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 076bc80..93042ab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -2008,7 +2008,9 @@ glusterd_handle_heal_cmd(xlator_t *this, glusterd_volinfo_t *volinfo, + if (!glusterd_is_volume_replicate(volinfo)) { + ret = -1; + snprintf(msg, sizeof(msg), +- "Volume %s is not of type " ++ "This command is supported " ++ "for only volume of replicated " ++ "type. Volume %s is not of type " + "replicate", + volinfo->volname); + *op_errstr = gf_strdup(msg); +-- +1.8.3.1 + diff --git a/SOURCES/0399-glusterd-coverity-fixes.patch b/SOURCES/0399-glusterd-coverity-fixes.patch new file mode 100644 index 0000000..8052a46 --- /dev/null +++ b/SOURCES/0399-glusterd-coverity-fixes.patch @@ -0,0 +1,79 @@ +From 1ebd2a3227469b1775f19c8f78af7d3d19f749a3 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Fri, 26 Apr 2019 08:47:12 +0530 +Subject: [PATCH 399/449] glusterd: coverity fixes + +1400775 - USE_AFTER_FREE +1400742 - Missing Unlock +1400736 - CHECKED_RETURN +1398470 - Missing Unlock + +Missing unlock is the tricky one, we have had annotation added, but +coverity still continued to complaint. Added pthread_mutex_unlock to +clean up the lock before destroying it to see if it makes coverity +happy. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22634/ +> Updates: bz#789278 +> Change-Id: I1d892612a17f805144d96c1b15004a85a1639414 +> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1787310 +Change-Id: I1d892612a17f805144d96c1b15004a85a1639414 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202343 +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 1 + + xlators/mgmt/glusterd/src/glusterd-sm.c | 1 - + xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +++++++- + 3 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +index f24c86e..8c1feeb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +@@ -48,6 +48,7 @@ glusterd_peerinfo_destroy(struct rcu_head *head) + } + + glusterd_sm_tr_log_delete(&peerinfo->sm_log); ++ pthread_mutex_unlock(&peerinfo->delete_lock); + pthread_mutex_destroy(&peerinfo->delete_lock); + GF_FREE(peerinfo); + +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 54a7bd1..044da3d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -868,7 +868,6 @@ glusterd_ac_friend_remove(glusterd_friend_sm_event_t *event, void *ctx) + "Cleanup returned: %d", ret); + } + out: +- /* coverity[ LOCK] */ + return 0; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 1b78812..a1299bc 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -5840,7 +5840,13 @@ attach_brick_callback(struct rpc_req *req, struct iovec *iov, int count, + /* PID file is copied once brick has attached + successfully + */ +- glusterd_copy_file(pidfile1, pidfile2); ++ ret = glusterd_copy_file(pidfile1, pidfile2); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Could not copy file %s to %s", pidfile1, pidfile2); ++ goto out; ++ } ++ + brickinfo->status = GF_BRICK_STARTED; + brickinfo->rpc = rpc_clnt_ref(other_brick->rpc); + gf_log(THIS->name, GF_LOG_INFO, "brick %s is attached successfully", +-- +1.8.3.1 + diff --git a/SOURCES/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch b/SOURCES/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch new file mode 100644 index 0000000..dd1ea52 --- /dev/null +++ b/SOURCES/0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch @@ -0,0 +1,98 @@ +From 12ed9226fa24d073ab2b89692194b454a194c379 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Thu, 4 Jun 2020 15:14:29 +0530 +Subject: [PATCH 400/449] cli: throw a warning if replica count greater than 3 + +As volumes with replica count greater than 3 are not +supported, a warning message is be thrown to user +while creating the volume with replica count greater +than 3 or while converting a volume to replica > 3 +volume by add-brick/remove-brick operations. + +Label: DOWNSTREAM ONLY + +BUG: 1763129 +Change-Id: I5a32a5a2d99b5175fb692dfcab27396089f24b72 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202338 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index ac0a263..5e7ce53 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -619,6 +619,23 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words, + } + } + } ++ ++ if (replica_count > 3) { ++ if (strcmp(words[wordcount - 1], "force")) { ++ question = ++ "Volumes with replica count greater than 3 are" ++ "not supported. \nDo you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Volume create " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } ++ } ++ + ret = dict_set_int32(dict, "replica-count", replica_count); + if (ret) + goto out; +@@ -1815,6 +1832,20 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words, + goto out; + } + } ++ } else if (count > 3) { ++ if (strcmp(words[wordcount - 1], "force")) { ++ question = ++ "Volumes with replica count greater than 3 are" ++ "not supported. \nDo you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation(state, question); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "add-brick " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } + } + } else if ((strcmp(w, "stripe")) == 0) { + cli_err("stripe option not supported"); +@@ -2082,6 +2113,20 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words, + goto out; + } + } ++ } else if (count > 3) { ++ if (strcmp(words[wordcount - 1], "force")) { ++ ques = ++ "Volumes with replica count greater than 3 are" ++ "not supported. \nDo you still want to continue?\n"; ++ answer = cli_cmd_get_confirmation(state, ques); ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_ERROR, ++ "Remove-brick " ++ "cancelled, exiting"); ++ ret = -1; ++ goto out; ++ } ++ } + } + + ret = dict_set_int32(dict, "replica-count", count); +-- +1.8.3.1 + diff --git a/SOURCES/0401-cli-change-the-warning-message.patch b/SOURCES/0401-cli-change-the-warning-message.patch new file mode 100644 index 0000000..5c3e895 --- /dev/null +++ b/SOURCES/0401-cli-change-the-warning-message.patch @@ -0,0 +1,70 @@ +From 704bf84d432e1eea1534e35ee27d4116a7273146 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Thu, 4 Jun 2020 16:15:35 +0530 +Subject: [PATCH 401/449] cli: change the warning message + +while creating the replica 2 volume or converting +a volume to replica 2 volume, we issue a warning +saying "replica 2 volumes are prone to split brain". +As the support for replica 2 volumes has been deprecated, +warning message should be changed accordingly to reflect +the same. + +Label: DOWNSTREAM ONLY + +BUG: 1763124 +Change-Id: If55e5412cda2e4a21a6359492d8d704dd702530d +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202348 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 5e7ce53..7446b95 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -603,8 +603,8 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words, + if (replica_count == 2) { + if (strcmp(words[wordcount - 1], "force")) { + question = +- "Replica 2 volumes are prone" +- " to split-brain. Use " ++ "Support for replica 2 volumes stands deprecated as " ++ "they are prone to split-brain. Use " + "Arbiter or Replica 3 to " + "avoid this.\n" + "Do you still want to " +@@ -1817,9 +1817,9 @@ cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words, + if (count == 2) { + if (strcmp(words[wordcount - 1], "force")) { + question = +- "Replica 2 volumes are prone to " +- "split-brain. Use Arbiter or " +- "Replica 3 to avaoid this. See: " ++ "Support for replica 2 volumes stands deprecated as they " ++ "are prone to split-brain. Use Arbiter or " ++ "Replica 3 to avoid this. See: " + "http://docs.gluster.org/en/latest/Administrator%20Guide/" + "Split%20brain%20and%20ways%20to%20deal%20with%20it/." + "\nDo you still want to continue?\n"; +@@ -2098,9 +2098,9 @@ cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words, + if (count == 2) { + if (strcmp(words[wordcount - 1], "force")) { + ques = +- "Replica 2 volumes are prone to " +- "split-brain. Use Arbiter or Replica 3 " +- "to avaoid this. See: " ++ "Support for replica 2 volumes stands deprecated as they " ++ "are prone to split-brain. Use Arbiter or Replica 3 " ++ "to avoid this. See: " + "http://docs.gluster.org/en/latest/Administrator%20Guide/" + "Split%20brain%20and%20ways%20to%20deal%20with%20it/." + "\nDo you still want to continue?\n"; +-- +1.8.3.1 + diff --git a/SOURCES/0402-afr-wake-up-index-healer-threads.patch b/SOURCES/0402-afr-wake-up-index-healer-threads.patch new file mode 100644 index 0000000..34ca329 --- /dev/null +++ b/SOURCES/0402-afr-wake-up-index-healer-threads.patch @@ -0,0 +1,198 @@ +From ecaa0f10820f4b6e803021919ce59a43aedf356b Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 4 Jun 2020 16:15:35 +0530 +Subject: [PATCH 402/449] afr: wake up index healer threads + +...whenever shd is re-enabled after disabling or there is a change in +`cluster.heal-timeout`, without needing to restart shd or waiting for the +current `cluster.heal-timeout` seconds to expire. + +> Upstream patch link:https://review.gluster.org/#/c/glusterfs/+/23288/ +> Change-Id: Ia5ebd7c8e9f5b54cba3199c141fdd1af2f9b9bfe +> fixes: bz#1744548 +> Reported-by: Glen Kiessling <glenk1973@hotmail.com> +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +BUG: 1764091 +Change-Id: I42aa0807f09b5a09510fe9efb4a1697dad3410a3 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202368 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1744548-heal-timeout.t | 42 +++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 6 ++-- + xlators/cluster/afr/src/afr-self-heald.c | 14 ++++++--- + xlators/cluster/afr/src/afr-self-heald.h | 3 -- + xlators/cluster/afr/src/afr.c | 10 ++++++ + xlators/cluster/afr/src/afr.h | 2 ++ + 6 files changed, 66 insertions(+), 11 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1744548-heal-timeout.t + +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +new file mode 100644 +index 0000000..3cb73bc +--- /dev/null ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -0,0 +1,42 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++TEST ! $CLI volume heal $V0 ++ ++# Enable shd and verify that index crawl is triggered immediately. ++TEST $CLI volume profile $V0 start ++TEST $CLI volume profile $V0 info clear ++TEST $CLI volume heal $V0 enable ++TEST $CLI volume heal $V0 ++# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes ++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` ++TEST [ "$COUNT" == "333" ] ++ ++# Check that a change in heal-timeout is honoured immediately. ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++sleep 10 ++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` ++# Two crawls must have happened. ++TEST [ "$COUNT" == "666" ] ++ ++# shd must not heal if it is disabled and heal-timeout is changed. ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume profile $V0 info clear ++TEST $CLI volume set $V0 cluster.heal-timeout 6 ++sleep 6 ++COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` ++TEST [ -z $COUNT ] ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 3690b84..eef7fd2 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5613,10 +5613,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2) + * b) Already heard from everyone, but we now got a child-up + * event. + */ +- if (have_heard_from_all && priv->shd.iamshd) { +- for (i = 0; i < priv->child_count; i++) +- if (priv->child_up[i]) +- afr_selfheal_childup(this, i); ++ if (have_heard_from_all) { ++ afr_selfheal_childup(this, priv); + } + } + out: +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 7eb1207..95ac5f2 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -1258,12 +1258,18 @@ out: + return ret; + } + +-int +-afr_selfheal_childup(xlator_t *this, int subvol) ++void ++afr_selfheal_childup(xlator_t *this, afr_private_t *priv) + { +- afr_shd_index_healer_spawn(this, subvol); ++ int subvol = 0; + +- return 0; ++ if (!priv->shd.iamshd) ++ return; ++ for (subvol = 0; subvol < priv->child_count; subvol++) ++ if (priv->child_up[subvol]) ++ afr_shd_index_healer_spawn(this, subvol); ++ ++ return; + } + + int +diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h +index 7de7c43..1990539 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.h ++++ b/xlators/cluster/afr/src/afr-self-heald.h +@@ -60,9 +60,6 @@ typedef struct { + } afr_self_heald_t; + + int +-afr_selfheal_childup(xlator_t *this, int subvol); +- +-int + afr_selfheal_daemon_init(xlator_t *this); + + int +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 33258a0..8f9e71f 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -141,6 +141,7 @@ reconfigure(xlator_t *this, dict_t *options) + afr_private_t *priv = NULL; + xlator_t *read_subvol = NULL; + int read_subvol_index = -1; ++ int timeout_old = 0; + int ret = -1; + int index = -1; + char *qtype = NULL; +@@ -150,6 +151,7 @@ reconfigure(xlator_t *this, dict_t *options) + char *locking_scheme = NULL; + gf_boolean_t consistent_io = _gf_false; + gf_boolean_t choose_local_old = _gf_false; ++ gf_boolean_t enabled_old = _gf_false; + + priv = this->private; + +@@ -255,11 +257,13 @@ reconfigure(xlator_t *this, dict_t *options) + GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options, + bool, out); + ++ enabled_old = priv->shd.enabled; + GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out); + + GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool, + out); + ++ timeout_old = priv->shd.timeout; + GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out); + + GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options, +@@ -283,6 +287,12 @@ reconfigure(xlator_t *this, dict_t *options) + consistent_io = _gf_false; + priv->consistent_io = consistent_io; + ++ if (priv->shd.enabled) { ++ if ((priv->shd.enabled != enabled_old) || ++ (timeout_old != priv->shd.timeout)) ++ afr_selfheal_childup(this, priv); ++ } ++ + ret = 0; + out: + return ret; +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index e731cfa..18f1a6a 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1332,4 +1332,6 @@ afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this, + void + afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); + ++void ++afr_selfheal_childup(xlator_t *this, afr_private_t *priv); + #endif /* __AFR_H__ */ +-- +1.8.3.1 + diff --git a/SOURCES/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch b/SOURCES/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch new file mode 100644 index 0000000..569bdc0 --- /dev/null +++ b/SOURCES/0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch @@ -0,0 +1,84 @@ +From b311385a3c4bd56d69d1fa7e9bd3d9a2ae5c344e Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 7 Oct 2019 12:27:01 +0530 +Subject: [PATCH 403/449] Fix spurious failure in bug-1744548-heal-timeout.t + +Script was assuming that the heal would have triggered +by the time test was executed, which may not be the case. +It can lead to following failures when the race happens: + +... +18:29:45 not ok 14 [ 85/ 1] < 26> '[ 331 == 333 ]' -> '' +... +18:29:45 not ok 16 [ 10097/ 1] < 33> '[ 668 == 666 ]' -> '' + +Heal on 3rd brick didn't start completely first time the command was executed. +So the extra count got added to the next profile info. + +Fixed it by depending on cumulative stats and waiting until the count is +satisfied using EXPECT_WITHIN + +> Upstream patch link:https://review.gluster.org/23523 +>fixes: bz#1759002 +>Change-Id: I3b410671c902d6b1458a757fa245613cb29d967d +>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1764091 +Change-Id: Ic4d16b6c8a1bbc35735567d60fd0383456b9f534 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202369 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1744548-heal-timeout.t | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +index 3cb73bc..0aaa3ea 100644 +--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -4,6 +4,11 @@ + . $(dirname $0)/../../volume.rc + . $(dirname $0)/../../afr.rc + ++function get_cumulative_opendir_count { ++#sed 'n:d' prints odd-numbered lines ++ $CLI volume profile $V0 info |grep OPENDIR|sed 'n;d' | awk '{print $8}'|tr -d '\n' ++} ++ + cleanup; + + TEST glusterd; +@@ -20,23 +25,23 @@ TEST ! $CLI volume heal $V0 + TEST $CLI volume profile $V0 start + TEST $CLI volume profile $V0 info clear + TEST $CLI volume heal $V0 enable +-TEST $CLI volume heal $V0 + # Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes +-COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +-TEST [ "$COUNT" == "333" ] ++EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count + + # Check that a change in heal-timeout is honoured immediately. + TEST $CLI volume set $V0 cluster.heal-timeout 5 + sleep 10 +-COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` + # Two crawls must have happened. +-TEST [ "$COUNT" == "666" ] ++EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count + + # shd must not heal if it is disabled and heal-timeout is changed. + TEST $CLI volume heal $V0 disable ++#Wait for configuration update and any opendir fops to complete ++sleep 10 + TEST $CLI volume profile $V0 info clear + TEST $CLI volume set $V0 cluster.heal-timeout 6 +-sleep 6 ++#Better to wait for more than 6 seconds to account for configuration updates ++sleep 10 + COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` + TEST [ -z $COUNT ] + cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0404-tests-Fix-spurious-failure.patch b/SOURCES/0404-tests-Fix-spurious-failure.patch new file mode 100644 index 0000000..9cbb6ea --- /dev/null +++ b/SOURCES/0404-tests-Fix-spurious-failure.patch @@ -0,0 +1,38 @@ +From b65ca1045910bc18c601681788eb322dbb8ec2fa Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 14 Oct 2019 10:29:31 +0530 +Subject: [PATCH 404/449] tests: Fix spurious failure + +> Upstream patch:https://review.gluster.org/23546 +> fixes: bz#1759002 +> Change-Id: I4d49e1c2ca9b3c1d74b9dd5a30f1c66983a76529 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1764091 +Change-Id: I8b66f08cce7a87788867c6373aed71d6fc65155f +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202370 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1744548-heal-timeout.t | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +index 0aaa3ea..c208112 100644 +--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -5,8 +5,8 @@ + . $(dirname $0)/../../afr.rc + + function get_cumulative_opendir_count { +-#sed 'n:d' prints odd-numbered lines +- $CLI volume profile $V0 info |grep OPENDIR|sed 'n;d' | awk '{print $8}'|tr -d '\n' ++#sed command prints content between Cumulative and Interval, this keeps content from Cumulative stats ++ $CLI volume profile $V0 info |sed -n '/^Cumulative/,/^Interval/p'|grep OPENDIR| awk '{print $8}'|tr -d '\n' + } + + cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch b/SOURCES/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch new file mode 100644 index 0000000..765c154 --- /dev/null +++ b/SOURCES/0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch @@ -0,0 +1,175 @@ +From 9c5f5b4ffd49e8c8631defb7b6873248bbfdaf9c Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Tue, 23 Jul 2019 13:16:04 +0000 +Subject: [PATCH 405/449] [core] fix return of local in __nlc_inode_ctx_get + +__nlc_inode_ctx_get assigns a value to nlc_pe_p which is never used by +its parent function or any of the predecessor hence remove the +assignment and also that function argument as it is not being used +anywhere. + +> fixes: bz#1732496 +> Change-Id: I5b950e1e251bd50a646616da872a4efe9d2ff8c9 +> (Cherry pick from commit 84a55090123a7e3124100e5564da8c521c3c22ab ) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23093/) + +BUG: 1686897 + +Change-Id: I5b950e1e251bd50a646616da872a4efe9d2ff8c9 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202372 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/nl-cache/src/nl-cache-helper.c | 36 +++++++++------------- + 1 file changed, 14 insertions(+), 22 deletions(-) + +diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c +index 009f33a..4314038 100644 +--- a/xlators/performance/nl-cache/src/nl-cache-helper.c ++++ b/xlators/performance/nl-cache/src/nl-cache-helper.c +@@ -145,12 +145,10 @@ nlc_disable_cache(xlator_t *this) + } + + static int +-__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, +- nlc_pe_t **nlc_pe_p) ++__nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) + { + int ret = 0; + nlc_ctx_t *nlc_ctx = NULL; +- nlc_pe_t *nlc_pe = NULL; + uint64_t nlc_ctx_int = 0; + uint64_t nlc_pe_int = 0; + +@@ -159,10 +157,6 @@ __nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, + nlc_ctx = (void *)(long)(nlc_ctx_int); + *nlc_ctx_p = nlc_ctx; + } +- if (ret == 0 && nlc_pe_p) { +- nlc_pe = (void *)(long)(nlc_pe_int); +- *nlc_pe_p = nlc_pe; +- } + return ret; + } + +@@ -186,14 +180,13 @@ nlc_inode_ctx_set(xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx, + } + + static void +-nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, +- nlc_pe_t **nlc_pe_p) ++nlc_inode_ctx_get(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) + { + int ret = 0; + + LOCK(&inode->lock); + { +- ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p, nlc_pe_p); ++ ret = __nlc_inode_ctx_get(this, inode, nlc_ctx_p); + if (ret < 0) + gf_msg_debug(this->name, 0, + "inode ctx get failed for " +@@ -290,8 +283,7 @@ out: + } + + static nlc_ctx_t * +-nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, +- nlc_pe_t **nlc_pe_p) ++nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p) + { + int ret = 0; + nlc_ctx_t *nlc_ctx = NULL; +@@ -301,7 +293,7 @@ nlc_inode_ctx_get_set(xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p, + + LOCK(&inode->lock); + { +- ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx, nlc_pe_p); ++ ret = __nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (nlc_ctx) + goto unlock; + +@@ -410,7 +402,7 @@ nlc_set_dir_state(xlator_t *this, inode_t *inode, uint64_t state) + goto out; + } + +- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -430,7 +422,7 @@ nlc_cache_timeout_handler(struct gf_tw_timer_list *timer, void *data, + nlc_timer_data_t *tmp = data; + nlc_ctx_t *nlc_ctx = NULL; + +- nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(tmp->this, tmp->inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -696,7 +688,7 @@ nlc_inode_clear_cache(xlator_t *this, inode_t *inode, int reason) + { + nlc_ctx_t *nlc_ctx = NULL; + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -883,7 +875,7 @@ nlc_dir_add_ne(xlator_t *this, inode_t *inode, const char *name) + goto out; + } + +- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -914,7 +906,7 @@ nlc_dir_remove_pe(xlator_t *this, inode_t *parent, inode_t *entry_ino, + goto out; + } + +- nlc_inode_ctx_get(this, parent, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, parent, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -945,7 +937,7 @@ nlc_dir_add_pe(xlator_t *this, inode_t *inode, inode_t *entry_ino, + goto out; + } + +- nlc_inode_ctx_get_set(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get_set(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -1051,7 +1043,7 @@ nlc_is_negative_lookup(xlator_t *this, loc_t *loc) + goto out; + } + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -1102,7 +1094,7 @@ nlc_get_real_file_name(xlator_t *this, loc_t *loc, const char *fname, + goto out; + } + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + if (!nlc_ctx) + goto out; + +@@ -1152,7 +1144,7 @@ nlc_dump_inodectx(xlator_t *this, inode_t *inode) + nlc_ne_t *ne = NULL; + nlc_ne_t *tmp1 = NULL; + +- nlc_inode_ctx_get(this, inode, &nlc_ctx, NULL); ++ nlc_inode_ctx_get(this, inode, &nlc_ctx); + + if (!nlc_ctx) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0406-afr-support-split-brain-CLI-for-replica-3.patch b/SOURCES/0406-afr-support-split-brain-CLI-for-replica-3.patch new file mode 100644 index 0000000..4b57e8a --- /dev/null +++ b/SOURCES/0406-afr-support-split-brain-CLI-for-replica-3.patch @@ -0,0 +1,185 @@ +From a75bb15fbe64f14580c44b8a33314c8bbeffdede Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 4 Jun 2020 18:54:46 +0530 +Subject: [PATCH 406/449] afr: support split-brain CLI for replica 3 + +Patch in upstream master: https://review.gluster.org/#/c/glusterfs/+/23502/ + +Ever since we added quorum checks for lookups in afr via commit +bd44d59741bb8c0f5d7a62c5b1094179dd0ce8a4, the split-brain resolution +commands would not work for replica 3 because there would be no +readables for the lookup fop. + +The argument was that split-brains do not occur in replica 3 but we do +see (data/metadata) split-brain cases once in a while which indicate that there are +a few bugs/corner cases yet to be discovered and fixed. + +Fortunately, commit 8016d51a3bbd410b0b927ed66be50a09574b7982 added +GF_CLIENT_PID_GLFS_HEALD as the pid for all fops made by glfsheal. If we +leverage this and allow lookups in afr when pid is GF_CLIENT_PID_GLFS_HEALD, +split-brain resolution commands will work for replica 3 volumes too. + +Likewise, the check is added in shard_lookup as well to permit resolving +split-brains by specifying "/.shard/shard-file.xx" as the file name +(which previously used to fail with EPERM). + +BUG: 1759875 +Change-Id: I203735b909c7d30fc4faaf3ecd4f5b6b379ab266 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202375 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../replicate/bug-1756938-replica-3-sbrain-cli.t | 111 +++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 3 +- + xlators/features/shard/src/shard.c | 3 +- + 3 files changed, 115 insertions(+), 2 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t + +diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t +new file mode 100644 +index 0000000..c1bdf34 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t +@@ -0,0 +1,111 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 features.shard enable ++TEST $CLI volume set $V0 features.shard-block-size 4MB ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++#Create split-brain by setting afr xattrs/gfids manually. ++#file1 is non-sharded and will be in data split-brain. ++#file2 will have one shard which will be in data split-brain. ++#file3 will have one shard which will be in gfid split-brain. ++#file4 will have one shard which will be in data & metadata split-brain. ++TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024 oflag=direct ++TEST dd if=/dev/zero of=$M0/file2 bs=1M count=6 oflag=direct ++TEST dd if=/dev/zero of=$M0/file3 bs=1M count=6 oflag=direct ++TEST dd if=/dev/zero of=$M0/file4 bs=1M count=6 oflag=direct ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++#------------------------------------------------------------------------------- ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/file1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/file1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/file1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/file1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/file1 ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/file1 ++ ++#------------------------------------------------------------------------------- ++gfid_f2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file2)) ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1 ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1 ++ ++#------------------------------------------------------------------------------- ++TESTS_EXPECTED_IN_LOOP=5 ++function assign_new_gfid { ++ brickpath=$1 ++ filename=$2 ++ gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/$filename)) ++ gfid_shard=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/.shard/$gfid.1)) ++ ++ TEST rm $brickpath/.glusterfs/${gfid_shard:0:2}/${gfid_shard:2:2}/$gfid_shard ++ TEST setfattr -x trusted.gfid $brickpath/.shard/$gfid.1 ++ new_gfid=$(get_random_gfid) ++ new_gfid_str=$(gf_gfid_xattr_to_str $new_gfid) ++ TEST setfattr -n trusted.gfid -v $new_gfid $brickpath/.shard/$gfid.1 ++ TEST mkdir -p $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2} ++ TEST ln $brickpath/.shard/$gfid.1 $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}/$new_gfid_str ++} ++assign_new_gfid $B0/$V0"1" file3 ++assign_new_gfid $B0/$V0"2" file3 ++ ++#------------------------------------------------------------------------------- ++gfid_f4=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file4)) ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1 ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1 ++ ++#------------------------------------------------------------------------------- ++#Add entry to xattrop dir on first brick and check for split-brain. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++ ++gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1 ++ ++gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1 ++ ++gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3)) ++gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1 ++ ++gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1 ++ ++#------------------------------------------------------------------------------- ++#gfid split-brain won't show up in split-brain count. ++EXPECT "3" afr_get_split_brain_count $V0 ++EXPECT_NOT "^0$" get_pending_heal_count $V0 ++ ++#Resolve split-brains ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file1 ++GFIDSTR="gfid:$gfid_f2_shard1" ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f3.1 ++TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f4.1 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index eef7fd2..32127c6 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2250,7 +2250,8 @@ afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this, + if ((spb_choice >= 0) && + (AFR_COUNT(success_replies, child_count) == child_count)) { + *read_subvol = spb_choice; +- } else if (!priv->quorum_count) { ++ } else if (!priv->quorum_count || ++ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) { + *read_subvol = afr_first_up_child(frame, this); + } else if (priv->quorum_count && + afr_has_quorum(data_readable, this, NULL)) { +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 2e2ef5d..16d557b 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1472,7 +1472,8 @@ int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + shard_local_t *local = NULL; + + this->itable = loc->inode->table; +- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { ++ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && ++ (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) { + SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch b/SOURCES/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch new file mode 100644 index 0000000..459462d --- /dev/null +++ b/SOURCES/0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch @@ -0,0 +1,60 @@ +From de31f2b0cb09a59941892c9981cb8a8b3aced9ec Mon Sep 17 00:00:00 2001 +From: kshithijiyer <kshithij.ki@gmail.com> +Date: Tue, 24 Dec 2019 13:02:21 +0530 +Subject: [PATCH 407/449] [geo-rep] Improving help message in + schedule_georep.py.in + +SLAVE positional argument doesn't provide a clear +picture of what it is when compared to mastervol and slavevol +in schedule_georep.py.in. It would be better if we change it to +something like "Slave hostame (<username>@SLAVEHOST or SLAVEHOST)" + +Present: +---------- +positional arguments: + mastervol Master Volume Name + SLAVE SLAVEHOST or root@SLAVEHOST or user@SLAVEHOST + slavevol Slave Volume Name + +Suggested: +----------- +positional arguments: + mastervol Master Volume Name + SLAVE Slave hostname (<username>@SLAVEHOST or SLAVEHOST) + slavevol Slave Volume Name + +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/23919/ + >fixes: bz#1786276 + >Change-Id: I73d52247997d623f77d55e51cbb6eccc08eb95ff + >Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> + +BUG: 1787994 +Change-Id: I73d52247997d623f77d55e51cbb6eccc08eb95ff +Signed-off-by: kshithijiyer <kshithij.ki@gmail.com> +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202454 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/geo-rep/schedule_georep.py.in | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in +index f29ae02..ac93716 100644 +--- a/extras/geo-rep/schedule_georep.py.in ++++ b/extras/geo-rep/schedule_georep.py.in +@@ -459,8 +459,8 @@ if __name__ == "__main__": + description=__doc__) + parser.add_argument("mastervol", help="Master Volume Name") + parser.add_argument("slave", +- help="SLAVEHOST or root@SLAVEHOST " +- "or user@SLAVEHOST", ++ help="Slave hostname " ++ "(<username>@SLAVEHOST or SLAVEHOST)", + metavar="SLAVE") + parser.add_argument("slavevol", help="Slave Volume Name") + parser.add_argument("--interval", help="Interval in Seconds. " +-- +1.8.3.1 + diff --git a/SOURCES/0408-geo-rep-Fix-ssh-port-validation.patch b/SOURCES/0408-geo-rep-Fix-ssh-port-validation.patch new file mode 100644 index 0000000..9fad8d1 --- /dev/null +++ b/SOURCES/0408-geo-rep-Fix-ssh-port-validation.patch @@ -0,0 +1,107 @@ +From 07ab5a460da007fc3809b1a943614d1c7f5fcfef Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Fri, 17 Jan 2020 11:03:46 +0000 +Subject: [PATCH 408/449] geo-rep: Fix ssh-port validation + +If non-standard ssh-port is used, Geo-rep can be configured to use ssh port +by using config option, the value should be in allowed port range and non negative. + +At present it can accept negative value and outside allowed port range which is incorrect. + +Many Linux kernels use the port range 32768 to 61000. +IANA suggests it should be in the range 1 to 2^16 - 1, so keeping the same. + +$ gluster volume geo-replication master 127.0.0.1::slave config ssh-port -22 +geo-replication config updated successfully +$ gluster volume geo-replication master 127.0.0.1::slave config ssh-port 22222222 +geo-replication config updated successfully + +This patch fixes the above issue and have added few validations around this +in test cases. +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24035/ + >Change-Id: I9875ab3f00d7257370fbac6f5ed4356d2fed3f3c + >Fixes: bz#1792276 + >Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1796814 +Change-Id: I9875ab3f00d7257370fbac6f5ed4356d2fed3f3c +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202453 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/gsyncd.conf.in | 4 +++- + tests/00-geo-rep/00-georep-verify-non-root-setup.t | 16 ++++++++++++++++ + tests/00-geo-rep/georep-basic-dr-rsync.t | 13 +++++++++++++ + 3 files changed, 32 insertions(+), 1 deletion(-) + +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 9155cd8..11e57fd 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -266,7 +266,9 @@ allowed_values=ERROR,INFO,WARNING,DEBUG + + [ssh-port] + value=22 +-validation=int ++validation=minmax ++min=1 ++max=65535 + help=Set SSH port + type=int + +diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +index c9fd8b2..12f0c01 100644 +--- a/tests/00-geo-rep/00-georep-verify-non-root-setup.t ++++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t +@@ -223,6 +223,22 @@ TEST $GEOREP_CLI $master $slave_url resume + #Validate failure of volume stop when geo-rep is running + TEST ! $CLI volume stop $GMV0 + ++#Negative test for ssh-port ++#Port should be integer and between 1-65535 range ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port -22 ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port abc ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 6875943 ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 4.5 ++ ++TEST ! $GEOREP_CLI $master $slave_url config ssh-port 22a ++ ++#Config Set ssh-port to validate int validation ++TEST $GEOREP_CLI $master $slave config ssh-port 22 ++ + #Hybrid directory rename test BZ#1763439 + TEST $GEOREP_CLI $master $slave_url config change_detector xsync + mkdir ${master_mnt}/dir1 +diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t +index b6fbf18..d785aa5 100644 +--- a/tests/00-geo-rep/georep-basic-dr-rsync.t ++++ b/tests/00-geo-rep/georep-basic-dr-rsync.t +@@ -71,6 +71,19 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created" + #Config gluster-command-dir + TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} + ++#Negative test for ssh-port ++#Port should be integer and between 1-65535 range ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port -22 ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port abc ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port 6875943 ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port 4.5 ++ ++TEST ! $GEOREP_CLI $master $slave config ssh-port 22a ++ + #Config Set ssh-port to validate int validation + TEST $GEOREP_CLI $master $slave config ssh-port 22 + +-- +1.8.3.1 + diff --git a/SOURCES/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch b/SOURCES/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch new file mode 100644 index 0000000..ca1c25a --- /dev/null +++ b/SOURCES/0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch @@ -0,0 +1,52 @@ +From a92b4f6373cb18544325436cf86abfebd6780d79 Mon Sep 17 00:00:00 2001 +From: Homma <homma@allworks.co.jp> +Date: Fri, 5 Jul 2019 16:10:41 +0530 +Subject: [PATCH 409/449] system/posix-acl: update ctx only if iatt is non-NULL + +We need to safe-guard against possible zero'ing out of iatt +structure in acl ctx, which can cause many issues. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/23003/ +> fixes: 1668286 +> Change-Id: Ie81a57d7453a6624078de3be8c0845bf4d432773 +> Signed-off-by: Amar Tumballi <amarts@redhat.com> + +BUG: 1781649 +Change-Id: I655b61551d30215b9f23cafc3ef9a5c0d98a43d0 +Signed-off-by: Raghavendra M <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202446 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/system/posix-acl/src/posix-acl.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xlators/system/posix-acl/src/posix-acl.c b/xlators/system/posix-acl/src/posix-acl.c +index 38e48b8..c6ba281 100644 +--- a/xlators/system/posix-acl/src/posix-acl.c ++++ b/xlators/system/posix-acl/src/posix-acl.c +@@ -875,6 +875,13 @@ posix_acl_ctx_update(inode_t *inode, xlator_t *this, struct iatt *buf, + int ret = 0; + int i = 0; + ++ if (!buf || !buf->ia_ctime) { ++ /* No need to update ctx if buf is empty */ ++ gf_log_callingfn(this->name, GF_LOG_DEBUG, "iatt struct is empty (%d)", ++ fop); ++ goto out; ++ } ++ + LOCK(&inode->lock); + { + ctx = __posix_acl_ctx_get(inode, this, _gf_true); +@@ -928,6 +935,7 @@ posix_acl_ctx_update(inode_t *inode, xlator_t *this, struct iatt *buf, + } + unlock: + UNLOCK(&inode->lock); ++out: + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch b/SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch new file mode 100644 index 0000000..97bdc78 --- /dev/null +++ b/SOURCES/0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch @@ -0,0 +1,249 @@ +From 2b2eb846c49caba13ab92ec66af20292e7780fc1 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Tue, 11 Feb 2020 14:34:48 +0530 +Subject: [PATCH 410/449] afr: prevent spurious entry heals leading to gfid + split-brain + +Problem: +In a hyperconverged setup with granular-entry-heal enabled, if a file is +recreated while one of the bricks is down, and an index heal is triggered +(with the brick still down), entry-self heal was doing a spurious heal +with just the 2 good bricks. It was doing a post-op leading to removal +of the filename from .glusterfs/indices/entry-changes as well as +erroneous setting of afr xattrs on the parent. When the brick came up, +the xattrs were cleared, resulting in the renamed file not getting +healed and leading to gfid split-brain and EIO on the mount. + +Fix: +Proceed with entry heal only when shd can connect to all bricks of the replica, +just like in data and metadata heal. + +BUG: 1804164 + +> Upstream patch:https://review.gluster.org/#/c/glusterfs/+/24109/ +> fixes: bz#1801624 +> Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +Change-Id: I23f57e543cff1e3f35eb8dbc60a2babfae6838c7 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202395 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bug-1433571-undo-pending-only-on-up-bricks.t | 18 ++----- + tests/bugs/replicate/bug-1801624-entry-heal.t | 58 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 8 +-- + xlators/cluster/afr/src/afr-self-heal-entry.c | 6 +-- + xlators/cluster/afr/src/afr-self-heal-name.c | 2 +- + xlators/cluster/afr/src/afr-self-heal.h | 2 - + 7 files changed, 69 insertions(+), 29 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1801624-entry-heal.t + +diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +index 0767f47..10ce013 100644 +--- a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t ++++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t +@@ -49,25 +49,15 @@ TEST $CLI volume start $V0 force + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +-#Kill brick 0 and turn on the client side heal and do ls to trigger the heal. +-#The pending xattrs on bricks 1 & 2 should have pending entry on brick 0. +-TEST kill_brick $V0 $H0 $B0/${V0}0 ++# We were killing one brick and checking that entry heal does not reset the ++# pending xattrs for the down brick. Now that we need all bricks to be up for ++# entry heal, I'm removing that test from the .t ++ + TEST $CLI volume set $V0 cluster.data-self-heal on + TEST $CLI volume set $V0 cluster.metadata-self-heal on + TEST $CLI volume set $V0 cluster.entry-self-heal on + + TEST ls $M0 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1 +-EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 +-EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2 +- +-#Bring back all the bricks and trigger the heal again by doing ls. Now the +-#pending xattrs on all the bricks should be 0. +-TEST $CLI volume start $V0 force +-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +-TEST ls $M0 +- + TEST cat $M0/f1 + TEST cat $M0/f2 + TEST cat $M0/f3 +diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t +new file mode 100644 +index 0000000..94b4651 +--- /dev/null ++++ b/tests/bugs/replicate/bug-1801624-entry-heal.t +@@ -0,0 +1,58 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2} ++TEST $CLI volume set $V0 heal-timeout 5 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 granular-entry-heal enable ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++echo "Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++ ++# Re-create the file when a brick is down. ++TEST kill_brick $V0 $H0 $B0/brick1 ++TEST rm $M0/FILE ++echo "New Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0 ++ ++# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices. ++$CLI volume heal $V0 # CLI will fail but heal is launched anyway. ++TEST sleep 5 # give index heal a chance to do one run. ++brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/) ++brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/) ++TEST [ $brick0_pending -eq "000000000000000000000002" ] ++TEST [ $brick2_pending -eq "000000000000000000000002" ] ++EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/ ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++$CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++TEST cat $M0/FILE ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 32127c6..5806556 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -6629,7 +6629,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto data_unlock; + ret = __afr_selfheal_data_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +@@ -6646,7 +6646,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) + ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, + LLONG_MAX - 1, 0, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) ++ if (ret < priv->child_count) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare( + heal_frame, this, inode, locked_on, sources, sinks, +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 81ef38a..ce1ea50 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1575,7 +1575,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + char *accused = NULL; /* Accused others without any self-accusal */ + char *pending = NULL; /* Have pending operations on others */ + char *self_accused = NULL; /* Accused itself */ +- int min_participants = -1; + + priv = this->private; + +@@ -1599,12 +1598,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, + } + } + +- if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { +- min_participants = priv->child_count; +- } else { +- min_participants = AFR_SH_MIN_PARTICIPANTS; +- } +- if (afr_success_count(replies, priv->child_count) < min_participants) { ++ if (afr_success_count(replies, priv->child_count) < priv->child_count) { + /* Treat this just like locks not being acquired */ + return -ENOTCONN; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 3ce882e..40be898 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -597,7 +597,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes " +@@ -991,7 +991,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, + ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, + data_lock); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +@@ -1115,7 +1115,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) + ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, + NULL, locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + gf_msg_debug(this->name, 0, + "%s: Skipping " + "entry self-heal as only %d sub-volumes could " +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 36640b5..7d4f208 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname, + locked_on); + { +- if (ret < AFR_SH_MIN_PARTICIPANTS) { ++ if (ret < priv->child_count) { + ret = -ENOTCONN; + goto unlock; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 6555ec5..8234cec 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -11,8 +11,6 @@ + #ifndef _AFR_SELFHEAL_H + #define _AFR_SELFHEAL_H + +-#define AFR_SH_MIN_PARTICIPANTS 2 +- + /* Perform fop on all UP subvolumes and wait for all callbacks to return */ + + #define AFR_ONALL(frame, rfn, fop, args...) \ +-- +1.8.3.1 + diff --git a/SOURCES/0411-tools-glusterfind-validate-session-name.patch b/SOURCES/0411-tools-glusterfind-validate-session-name.patch new file mode 100644 index 0000000..db633f2 --- /dev/null +++ b/SOURCES/0411-tools-glusterfind-validate-session-name.patch @@ -0,0 +1,116 @@ +From 854defb4ff5e0d53f51545d20796aff662f9850f Mon Sep 17 00:00:00 2001 +From: Saravanakumar Arumugam <sarumuga@redhat.com> +Date: Thu, 9 Jul 2015 15:56:28 +0530 +Subject: [PATCH 411/449] tools/glusterfind : validate session name + +Validate a session name(during create) for the following: +1. minimum 2 character length. +2. Maximum 256 characters. +3. No special characters apart from underscore, hyphen allowed. + +Also, validate volume(expect, while using glusterfind list). + +>Change-Id: I1b1e64e218f93d0a531d3cf69fc2ce7e2ed11d01 +>BUG: 1241494 +>Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com> +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/11602/ + +BUG: 1234220 +Change-Id: I1b1e64e218f93d0a531d3cf69fc2ce7e2ed11d01 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202469 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tools/glusterfind/src/main.py | 50 ++++++++++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 8 deletions(-) + +diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py +index 5ca1fec..4b5466d 100644 +--- a/tools/glusterfind/src/main.py ++++ b/tools/glusterfind/src/main.py +@@ -23,6 +23,7 @@ import tempfile + import signal + from datetime import datetime + import codecs ++import re + + from utils import execute, is_host_local, mkdirp, fail + from utils import setup_logger, human_time, handle_rm_error +@@ -520,11 +521,8 @@ def write_output(outfile, outfilemerger, field_separator): + else: + gfind_write(f, row[0], field_separator, p_rep) + +-def mode_create(session_dir, args): +- logger.debug("Init is called - Session: %s, Volume: %s" +- % (args.session, args.volume)) +- +- cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] ++def validate_volume(volume): ++ cmd = ["gluster", 'volume', 'info', volume, "--xml"] + _, data, _ = execute(cmd, + exit_msg="Failed to Run Gluster Volume Info", + logger=logger) +@@ -532,11 +530,42 @@ def mode_create(session_dir, args): + tree = etree.fromstring(data) + statusStr = tree.find('volInfo/volumes/volume/statusStr').text + except (ParseError, AttributeError) as e: +- fail("Invalid Volume: %s" % e, logger=logger) +- ++ fail("Invalid Volume: Check the Volume name! %s" % e) + if statusStr != "Started": +- fail("Volume %s is not online" % args.volume, logger=logger) ++ fail("Volume %s is not online" % volume) ++ ++# The rules for a valid session name. ++SESSION_NAME_RULES = { ++ 'min_length': 2, ++ 'max_length': 256, # same as maximum volume length ++ # Specifies all alphanumeric characters, underscore, hyphen. ++ 'valid_chars': r'0-9a-zA-Z_-', ++} ++ ++ ++# checks valid session name, fail otherwise ++def validate_session_name(session): ++ # Check for minimum length ++ if len(session) < SESSION_NAME_RULES['min_length']: ++ fail('session_name must be at least ' + ++ str(SESSION_NAME_RULES['min_length']) + ' characters long.') ++ # Check for maximum length ++ if len(session) > SESSION_NAME_RULES['max_length']: ++ fail('session_name must not exceed ' + ++ str(SESSION_NAME_RULES['max_length']) + ' characters length.') ++ ++ # Matches strings composed entirely of characters specified within ++ if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] + ++ ']+$', session): ++ fail('Session name can only contain these characters: ' + ++ SESSION_NAME_RULES['valid_chars']) ++ ++ ++def mode_create(session_dir, args): ++ validate_session_name(args.session) + ++ logger.debug("Init is called - Session: %s, Volume: %s" ++ % (args.session, args.volume)) + mkdirp(session_dir, exit_on_err=True, logger=logger) + mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, + logger=logger) +@@ -850,6 +879,11 @@ def main(): + args.mode not in ["create", "list", "query"]: + fail("Invalid session %s" % args.session) + ++ # volume involved, validate the volume first ++ if args.mode not in ["list"]: ++ validate_volume(args.volume) ++ ++ + # "default" is a system defined session name + if args.mode in ["create", "post", "pre", "delete"] and \ + args.session == "default": +-- +1.8.3.1 + diff --git a/SOURCES/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch b/SOURCES/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch new file mode 100644 index 0000000..865fddf --- /dev/null +++ b/SOURCES/0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch @@ -0,0 +1,46 @@ +From 0769c5ddc78ea37b9a43ac35dd71ec8cea4b8da8 Mon Sep 17 00:00:00 2001 +From: yinkui <13965432176@163.com> +Date: Fri, 16 Aug 2019 10:15:07 +0800 +Subject: [PATCH 412/449] gluster-smb:add smb parameter when access gluster by + cifs + +Backport of https://review.gluster.org/23240 + +Change-Id: I9ff54f2ca6f86bb5b2f4740485a0159e1fd7785f +BUG: 1783232 +Signed-off-by: yinkui <13965432176@163.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202472 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/hook-scripts/set/post/S30samba-set.sh | 1 + + extras/hook-scripts/start/post/S30samba-start.sh | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh +index d2a62d3..e73f00f 100755 +--- a/extras/hook-scripts/set/post/S30samba-set.sh ++++ b/extras/hook-scripts/set/post/S30samba-set.sh +@@ -90,6 +90,7 @@ function add_samba_share () { + STRING+="path = /\n" + STRING+="read only = no\n" + STRING+="guest ok = yes\n" ++ STRING+="kernel share modes = no\n" + printf "$STRING" >> ${CONFIGFILE} + } + +diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh +index 2854bdd..0d5a5ed 100755 +--- a/extras/hook-scripts/start/post/S30samba-start.sh ++++ b/extras/hook-scripts/start/post/S30samba-start.sh +@@ -89,6 +89,7 @@ function add_samba_share () { + STRING+="path = /\n" + STRING+="read only = no\n" + STRING+="guest ok = yes\n" ++ STRING+="kernel share modes = no\n" + printf "$STRING" >> "${CONFIGFILE}" + } + +-- +1.8.3.1 + diff --git a/SOURCES/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch b/SOURCES/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch new file mode 100644 index 0000000..1ff6348 --- /dev/null +++ b/SOURCES/0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch @@ -0,0 +1,46 @@ +From aec3dd00fa76547316fddd07e6ded428d945986c Mon Sep 17 00:00:00 2001 +From: Anoop C S <anoopcs@redhat.com> +Date: Fri, 22 Nov 2019 17:36:55 +0530 +Subject: [PATCH 413/449] extras/hooks: Remove smb.conf parameter allowing + guest access + +Backport of https://review.gluster.org/23745 + +Change-Id: I88f494f16153d27ab6e2f2faf4d557e075671b10 +BUG: 1775637 +Signed-off-by: Anoop C S <anoopcs@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202473 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/hook-scripts/set/post/S30samba-set.sh | 1 - + extras/hook-scripts/start/post/S30samba-start.sh | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh +index e73f00f..854f131 100755 +--- a/extras/hook-scripts/set/post/S30samba-set.sh ++++ b/extras/hook-scripts/set/post/S30samba-set.sh +@@ -89,7 +89,6 @@ function add_samba_share () { + STRING+="glusterfs:loglevel = 7\n" + STRING+="path = /\n" + STRING+="read only = no\n" +- STRING+="guest ok = yes\n" + STRING+="kernel share modes = no\n" + printf "$STRING" >> ${CONFIGFILE} + } +diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh +index 0d5a5ed..cac0cbf 100755 +--- a/extras/hook-scripts/start/post/S30samba-start.sh ++++ b/extras/hook-scripts/start/post/S30samba-start.sh +@@ -88,7 +88,6 @@ function add_samba_share () { + STRING+="glusterfs:loglevel = 7\n" + STRING+="path = /\n" + STRING+="read only = no\n" +- STRING+="guest ok = yes\n" + STRING+="kernel share modes = no\n" + printf "$STRING" >> "${CONFIGFILE}" + } +-- +1.8.3.1 + diff --git a/SOURCES/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch b/SOURCES/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch new file mode 100644 index 0000000..67b71dd --- /dev/null +++ b/SOURCES/0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch @@ -0,0 +1,62 @@ +From 5b549cbf3f1873054c6d187b09aa9f9313971b1f Mon Sep 17 00:00:00 2001 +From: Kinglong Mee <kinglongmee@gmail.com> +Date: Mon, 18 Mar 2019 20:47:54 +0800 +Subject: [PATCH 414/449] cluster-syncop: avoid duplicate unlock of + inodelk/entrylk + +When using ec, there are many spam messages in brick and client +logs files. + +When shd does entry heal, it takes lock on a directory using +cluster_tiebreaker_inodelk(). If it does not get lock on all +the bricks because other clients has got lock on some bricks, +it will unlock the locks on those bricks which it got and then +will try blocking locks (If any one of the previous was successful). + +The problem come here. In case we do not get locks on all the +required bricks, we are sending unlock request twice on those +bricks where we got the locks. + +BUG: 1750211 +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/22377/ +> Change-Id: Ib164d29ebb071f620a4ca9679c4345ef7c88512a +> Updates: bz#1689920 +> Signed-off-by: Kinglong Mee <mijinlong@open-fs.com> + +Change-Id: I1647548ba75fdd27fd4e20dec08db67774f43375 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202477 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/cluster-syncop.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c +index 5a08f26..6ee89dd 100644 +--- a/libglusterfs/src/cluster-syncop.c ++++ b/libglusterfs/src/cluster-syncop.c +@@ -1203,6 +1203,10 @@ cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on, + if (num_success) { + FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, + inodelk, dom, &loc, F_SETLKW, &flock, NULL); ++ } else { ++ loc_wipe(&loc); ++ memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +@@ -1244,7 +1248,9 @@ cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on, + entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, + NULL); + } else { ++ loc_wipe(&loc); + memset(locked_on, 0, numsubvols); ++ return 0; + } + break; + } +-- +1.8.3.1 + diff --git a/SOURCES/0415-dht-Fix-stale-layout-and-create-issue.patch b/SOURCES/0415-dht-Fix-stale-layout-and-create-issue.patch new file mode 100644 index 0000000..476a8cc --- /dev/null +++ b/SOURCES/0415-dht-Fix-stale-layout-and-create-issue.patch @@ -0,0 +1,523 @@ +From ba23e6d8f4eff11a228816149a8a1ccd6df41146 Mon Sep 17 00:00:00 2001 +From: Susant Palai <spalai@redhat.com> +Date: Fri, 27 Dec 2019 12:06:19 +0530 +Subject: [PATCH 415/449] dht: Fix stale-layout and create issue + +Problem: With lookup-optimize set to on by default, a client with +stale-layout can create a new file on a wrong subvol. This will lead to +possible duplicate files if two different clients attempt to create the +same file with two different layouts. + +Solution: Send in-memory layout to be cross checked at posix before +commiting a "create". In case of a mismatch, sync the client layout with +that of the server and attempt the create fop one more time. + +test: Manual, testcase(attached) + +(Backport of https://review.gluster.org/#/c/glusterfs/+/23927/) + +BUG: 1748865 +Change-Id: I6c82c97418654ae8eb3b81ab65f1247aa4002ceb +Signed-off-by: Susant Palai <spalai@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202465 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/distribute/bug-1786679.t | 69 +++++++++++ + xlators/cluster/dht/src/dht-common.c | 147 ++++++++++++++++++++--- + xlators/cluster/dht/src/dht-common.h | 6 + + xlators/protocol/client/src/client-rpc-fops_v2.c | 9 +- + xlators/storage/posix/src/posix-entry-ops.c | 29 ++++- + xlators/storage/posix/src/posix-helpers.c | 76 ++++++++++++ + xlators/storage/posix/src/posix.h | 4 + + 7 files changed, 321 insertions(+), 19 deletions(-) + create mode 100755 tests/bugs/distribute/bug-1786679.t + +diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t +new file mode 100755 +index 0000000..219ce51 +--- /dev/null ++++ b/tests/bugs/distribute/bug-1786679.t +@@ -0,0 +1,69 @@ ++#!/bin/bash ++ ++SCRIPT_TIMEOUT=250 ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../dht.rc ++ ++ ++# create 2 subvols ++# create a dir ++# create a file ++# change layout ++# remove the file ++# execute create from a different mount ++# Without the patch, the file will be present on both of the bricks ++ ++cleanup ++ ++function get_layout () { ++ ++layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'` ++ ++echo $layout ++ ++} ++ ++function set_layout() ++{ ++ setfattr -n "trusted.glusterfs.dht" -v $1 $2 ++} ++ ++TEST glusterd ++TEST pidof glusterd ++ ++BRICK1=$B0/${V0}-0 ++BRICK2=$B0/${V0}-1 ++ ++TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2 ++TEST $CLI volume start $V0 ++ ++# Mount FUSE and create symlink ++TEST glusterfs -s $H0 --volfile-id $V0 $M0 ++TEST mkdir $M0/dir ++TEST touch $M0/dir/file ++TEST ! stat "$BRICK1/dir/file" ++TEST stat "$BRICK2/dir/file" ++ ++layout1="$(get_layout "$BRICK1/dir")" ++layout2="$(get_layout "$BRICK2/dir")" ++ ++TEST set_layout $layout1 "$BRICK2/dir" ++TEST set_layout $layout2 "$BRICK1/dir" ++ ++TEST rm $M0/dir/file -f ++TEST gluster v set $V0 client-log-level DEBUG ++ ++#Without the patch in place, this client will create the file in $BRICK2 ++#which will lead to two files being on both the bricks when a new client ++#create the file with the same name ++TEST touch $M0/dir/file ++ ++TEST glusterfs -s $H0 --volfile-id $V0 $M1 ++TEST touch $M1/dir/file ++ ++TEST stat "$BRICK1/dir/file" ++TEST ! stat "$BRICK2/dir/file" ++ ++cleanup +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 7890e7a..6aa18f3 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -8262,6 +8262,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + xlator_t *prev = NULL; + int ret = -1; + dht_local_t *local = NULL; ++ gf_boolean_t parent_layout_changed = _gf_false; ++ char pgfid[GF_UUID_BUF_SIZE] = {0}; ++ xlator_t *subvol = NULL; ++ ++ local = frame->local; + + local = frame->local; + if (!local) { +@@ -8270,8 +8275,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + goto out; + } + +- if (op_ret == -1) ++ if (op_ret == -1) { ++ local->op_errno = op_errno; ++ parent_layout_changed = (xdata && ++ dict_get(xdata, GF_PREOP_CHECK_FAILED)) ++ ? _gf_true ++ : _gf_false; ++ ++ if (parent_layout_changed) { ++ if (local && local->lock[0].layout.parent_layout.locks) { ++ /* Returning failure as the layout could not be fixed even under ++ * the lock */ ++ goto out; ++ } ++ ++ gf_uuid_unparse(local->loc.parent->gfid, pgfid); ++ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "create (%s/%s) (path: %s): parent layout " ++ "changed. Attempting a layout refresh and then a " ++ "retry", ++ pgfid, local->loc.name, local->loc.path); ++ ++ /* ++ dht_refresh_layout needs directory info in local->loc.Hence, ++ storing the parent_loc in local->loc and storing the create ++ context in local->loc2. We will restore this information in ++ dht_creation_do. ++ */ ++ ++ loc_wipe(&local->loc2); ++ ++ ret = loc_copy(&local->loc2, &local->loc); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, ++ "loc_copy failed %s", local->loc.path); ++ ++ goto out; ++ } ++ ++ loc_wipe(&local->loc); ++ ++ ret = dht_build_parent_loc(this, &local->loc, &local->loc2, ++ &op_errno); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, ++ "parent loc build failed"); ++ goto out; ++ } ++ ++ subvol = dht_subvol_get_hashed(this, &local->loc2); ++ ++ ret = dht_create_lock(frame, subvol); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, ++ "locking parent failed"); ++ goto out; ++ } ++ ++ return 0; ++ } ++ + goto out; ++ } + + prev = cookie; + +@@ -8392,6 +8458,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); + ++ dht_set_parent_layout_in_dict(loc, this, local); ++ + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +@@ -8400,10 +8468,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + avail_subvol = dht_free_disk_available_subvol(this, subvol, local); + + if (avail_subvol != subvol) { +- local->params = dict_ref(params); +- local->flags = flags; +- local->mode = mode; +- local->umask = umask; + local->cached_subvol = avail_subvol; + local->hashed_subvol = subvol; + +@@ -8419,6 +8483,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, + subvol->name); + ++ dht_set_parent_layout_in_dict(loc, this, local); ++ + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +@@ -8680,6 +8746,60 @@ err: + } + + int ++dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local) ++{ ++ dht_conf_t *conf = this->private; ++ dht_layout_t *parent_layout = NULL; ++ int *parent_disk_layout = NULL; ++ xlator_t *hashed_subvol = NULL; ++ char pgfid[GF_UUID_BUF_SIZE] = {0}; ++ int ret = 0; ++ ++ gf_uuid_unparse(loc->parent->gfid, pgfid); ++ ++ parent_layout = dht_layout_get(this, loc->parent); ++ hashed_subvol = dht_subvol_get_hashed(this, loc); ++ ++ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, ++ &parent_disk_layout); ++ if (ret == -1) { ++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno, ++ DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "%s (%s/%s) (path: %s): " ++ "extracting in-memory layout of parent failed. ", ++ gf_fop_list[local->fop], pgfid, loc->name, loc->path); ++ goto err; ++ } ++ ++ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY, ++ conf->xattr_name); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno, ++ DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "%s (%s/%s) (path: %s): " ++ "setting %s key in params dictionary failed. ", ++ gf_fop_list[local->fop], pgfid, loc->name, loc->path, ++ GF_PREOP_PARENT_KEY); ++ goto err; ++ } ++ ++ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout, ++ 4 * 4); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, local->op_errno, ++ DHT_MSG_PARENT_LAYOUT_CHANGED, ++ "%s (%s/%s) (path: %s): " ++ "setting parent-layout in params dictionary failed. ", ++ gf_fop_list[local->fop], pgfid, loc->name, loc->path); ++ goto err; ++ } ++ ++err: ++ dht_layout_unref(this, parent_layout); ++ return ret; ++} ++ ++int + dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) + { +@@ -8705,6 +8825,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + goto err; + } + ++ local->params = dict_ref(params); ++ local->flags = flags; ++ local->mode = mode; ++ local->umask = umask; ++ + if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) { + gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO, + "creating %s on %s (got create on %s)", local->loc.path, +@@ -8720,10 +8845,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + + if (hashed_subvol && (hashed_subvol != subvol)) { + /* Create the linkto file and then the data file */ +- local->params = dict_ref(params); +- local->flags = flags; +- local->mode = mode; +- local->umask = umask; + local->cached_subvol = subvol; + local->hashed_subvol = hashed_subvol; + +@@ -8736,6 +8857,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + * file as we expect a lookup everywhere if there are problems + * with the parent layout + */ ++ ++ dht_set_parent_layout_in_dict(loc, this, local); ++ + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, &local->loc, flags, mode, umask, + fd, params); +@@ -8787,11 +8911,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + goto err; + } + +- local->params = dict_ref(params); +- local->flags = flags; +- local->mode = mode; +- local->umask = umask; +- + loc_wipe(&local->loc); + + ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 8e65111..1b3e826 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1549,4 +1549,10 @@ dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno); + int + dht_dir_layout_error_check(xlator_t *this, inode_t *inode); + ++int32_t ++dht_create_lock(call_frame_t *frame, xlator_t *subvol); ++ ++int ++dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local); ++ + #endif /* _DHT_H */ +diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c +index 2673b6e..613dda8 100644 +--- a/xlators/protocol/client/src/client-rpc-fops_v2.c ++++ b/xlators/protocol/client/src/client-rpc-fops_v2.c +@@ -2094,11 +2094,12 @@ client4_0_create_cbk(struct rpc_req *req, struct iovec *iov, int count, + goto out; + } + ++ ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, ++ local, &xdata); ++ if (ret < 0) ++ goto out; ++ + if (-1 != rsp.op_ret) { +- ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, +- local, &xdata); +- if (ret < 0) +- goto out; + ret = client_add_fd_to_saved_fds(frame->this, fd, &local->loc, + local->flags, rsp.fd, 0); + if (ret) { +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index bea0bbf..65650b3 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -2070,6 +2070,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + mode_t mode_bit = 0; + ++ dict_t *xdata_rsp = dict_ref(xdata); ++ + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); +@@ -2118,6 +2120,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + was_present = 0; + } + ++ if (!was_present) { ++ if (posix_is_layout_stale(xdata, par_path, this)) { ++ op_ret = -1; ++ op_errno = EIO; ++ if (!xdata_rsp) { ++ xdata_rsp = dict_new(); ++ if (!xdata_rsp) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ } ++ ++ if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == ++ -1) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, ++ "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); ++ } ++ ++ goto out; ++ } ++ } ++ + if (priv->o_direct) + _flags |= O_DIRECT; + +@@ -2239,7 +2263,10 @@ out: + + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, + (loc) ? loc->inode : NULL, &stbuf, &preparent, +- &postparent, xdata); ++ &postparent, xdata_rsp); ++ ++ if (xdata_rsp) ++ dict_unref(xdata_rsp); + + return 0; + } +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 35dd3b6..2c27d22 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -3559,3 +3559,79 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) + } + } + } ++ ++gf_boolean_t ++posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) ++{ ++ int op_ret = 0; ++ ssize_t size = 0; ++ char value_buf[4096] = { ++ 0, ++ }; ++ gf_boolean_t have_val = _gf_false; ++ data_t *arg_data = NULL; ++ char *xattr_name = NULL; ++ gf_boolean_t is_stale = _gf_false; ++ ++ op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); ++ if (xattr_name == NULL) { ++ op_ret = 0; ++ goto out; ++ } ++ ++ arg_data = dict_get(xdata, xattr_name); ++ if (!arg_data) { ++ op_ret = 0; ++ goto out; ++ } ++ ++ size = sys_lgetxattr(par_path, xattr_name, value_buf, ++ sizeof(value_buf) - 1); ++ ++ if (size >= 0) { ++ have_val = _gf_true; ++ } else { ++ if (errno == ERANGE) { ++ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, ++ "getxattr on key (%s) path (%s) failed due to" ++ " buffer overflow", ++ xattr_name, par_path); ++ size = sys_lgetxattr(par_path, xattr_name, NULL, 0); ++ } ++ if (size < 0) { ++ op_ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, ++ "getxattr on key (%s) failed, path : %s", xattr_name, ++ par_path); ++ goto out; ++ } ++ } ++ ++ if (!have_val) { ++ size = sys_lgetxattr(par_path, xattr_name, value_buf, size); ++ if (size < 0) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, ++ "getxattr on key (%s) failed (%s)", xattr_name, ++ strerror(errno)); ++ goto out; ++ } ++ } ++ ++ if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { ++ gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, ++ "failing preop as on-disk xattr value differs from argument " ++ "value for key %s", ++ xattr_name); ++ op_ret = -1; ++ } ++ ++out: ++ dict_del_sizen(xdata, xattr_name); ++ dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); ++ ++ if (op_ret == -1) { ++ is_stale = _gf_true; ++ } ++ ++ return is_stale; ++} +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index dd51062..ac9d83c 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -671,4 +671,8 @@ posix_spawn_ctx_janitor_thread(xlator_t *this); + + void + posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); ++ ++gf_boolean_t ++posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); ++ + #endif /* _POSIX_H */ +-- +1.8.3.1 + diff --git a/SOURCES/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch b/SOURCES/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch new file mode 100644 index 0000000..1954e6a --- /dev/null +++ b/SOURCES/0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch @@ -0,0 +1,72 @@ +From 63cfdd987b1dfbf97486f0f884380faee0ae25d0 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Wed, 4 Sep 2019 11:27:30 +0530 +Subject: [PATCH 416/449] tests: fix spurious failure of + bug-1402841.t-mt-dir-scan-race.t + +Upstream patch: https://review.gluster.org/23352 + +Problem: +Since commit 600ba94183333c4af9b4a09616690994fd528478, shd starts +healing as soon as it is toggled from disabled to enabled. This was +causing the following line in the .t to fail on a 'fast' machine (always +on my laptop and sometimes on the jenkins slaves). + +EXPECT_NOT "^0$" get_pending_heal_count $V0 + +because by the time shd was disabled, the heal was already completed. + +Fix: +Increase the no. of files to be healed and make it a variable called +FILE_COUNT, should we need to bump it up further because the machines +become even faster. Also created pending metadata heals to increase the +time taken to heal a file. + +>fixes: bz#1748744 +>Change-Id: I5a26b08e45b8c19bce3c01ce67bdcc28ed48198d +Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +BUG: 1844359 +Change-Id: Ie3676c6c2c27e7574b958d2eaac23801dfaed3a9 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202481 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t +index 6351ba2..a1b9a85 100755 +--- a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t ++++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t +@@ -3,6 +3,8 @@ + . $(dirname $0)/../../volume.rc + cleanup; + ++FILE_COUNT=500 ++ + TEST glusterd + TEST pidof glusterd + TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +@@ -11,15 +13,14 @@ TEST $CLI volume set $V0 cluster.shd-wait-qlength 100 + TEST $CLI volume start $V0 + + TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +-touch $M0/file{1..200} +- ++for i in `seq 1 $FILE_COUNT`; do touch $M0/file$i; done + TEST kill_brick $V0 $H0 $B0/${V0}1 +-for i in {1..200}; do echo hello>$M0/file$i; done ++for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done + TEST $CLI volume start $V0 force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +-EXPECT "200" get_pending_heal_count $V0 ++EXPECT "$FILE_COUNT" get_pending_heal_count $V0 + TEST $CLI volume set $V0 self-heal-daemon on + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +-- +1.8.3.1 + diff --git a/SOURCES/0417-events-fix-IPv6-memory-corruption.patch b/SOURCES/0417-events-fix-IPv6-memory-corruption.patch new file mode 100644 index 0000000..cefb5bf --- /dev/null +++ b/SOURCES/0417-events-fix-IPv6-memory-corruption.patch @@ -0,0 +1,153 @@ +From 5e231ceb35bb763d6fafc7c3efe1c3c582929cc2 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Tue, 14 Jan 2020 13:28:47 +0100 +Subject: [PATCH 417/449] events: fix IPv6 memory corruption + +When an event was generated and the target host was resolved to an IPv6 +address, there was a memory overflow when that address was copied to a +fixed IPv4 structure (IPv6 addresses are longer than IPv4 ones). + +This fix correctly handles IPv4 and IPv6 addresses returned by +getaddrinfo() + +Backport of: +> Upstream-patch-link: https://review.gluster.org/24014 +> Change-Id: I5864a0c6e6f1b405bd85988529570140cf23b250 +> Fixes: bz#1790870 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1792873 +Change-Id: I5864a0c6e6f1b405bd85988529570140cf23b250 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202486 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/events.c | 56 +++++++++++++---------------------------------- + 1 file changed, 15 insertions(+), 41 deletions(-) + +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 4e2f8f9..6d1e383 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -34,7 +34,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + int ret = 0; + int sock = -1; + char *eventstr = NULL; +- struct sockaddr_in server; + va_list arguments; + char *msg = NULL; + glusterfs_ctx_t *ctx = NULL; +@@ -42,11 +41,10 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + struct addrinfo hints; + struct addrinfo *result = NULL; + xlator_t *this = THIS; +- int sin_family = AF_INET; + char *volfile_server_transport = NULL; + + /* Global context */ +- ctx = THIS->ctx; ++ ctx = this->ctx; + + if (event < 0 || event >= EVENT_LAST) { + ret = EVENT_ERROR_INVALID_INPUTS; +@@ -60,48 +58,31 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + goto out; + } + +- memset(&hints, 0, sizeof(hints)); +- hints.ai_family = AF_UNSPEC; +- + if (ctx) { + volfile_server_transport = ctx->cmd_args.volfile_server_transport; + } +- + if (!volfile_server_transport) { + volfile_server_transport = "tcp"; + } +- /* Get Host name to send message */ ++ ++ /* host = NULL returns localhost */ ++ host = NULL; + if (ctx && ctx->cmd_args.volfile_server && + (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set + use that information to push the events. */ +- if ((getaddrinfo(ctx->cmd_args.volfile_server, NULL, &hints, +- &result)) != 0) { +- ret = EVENT_ERROR_RESOLVE; +- goto out; +- } +- +- if (get_ip_from_addrinfo(result, &host) == NULL) { +- ret = EVENT_ERROR_RESOLVE; +- goto out; +- } +- +- sin_family = result->ai_family; +- } else { +- /* Localhost, Use the defined IP for localhost */ +- host = gf_strdup(EVENT_HOST); ++ host = ctx->cmd_args.volfile_server; + } + +- /* Socket Configurations */ +- server.sin_family = sin_family; +- server.sin_port = htons(EVENT_PORT); +- ret = inet_pton(server.sin_family, host, &server.sin_addr); +- if (ret <= 0) { +- gf_msg(this->name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, +- "inet_pton failed with return code %d", ret); ++ memset(&hints, 0, sizeof(hints)); ++ hints.ai_family = AF_UNSPEC; ++ hints.ai_socktype = SOCK_DGRAM; ++ hints.ai_flags = AI_ADDRCONFIG; ++ ++ if ((getaddrinfo(host, TOSTRING(EVENT_PORT), &hints, &result)) != 0) { ++ ret = EVENT_ERROR_RESOLVE; + goto out; + } +- memset(&server.sin_zero, '\0', sizeof(server.sin_zero)); + + va_start(arguments, fmt); + ret = gf_vasprintf(&msg, fmt, arguments); +@@ -113,15 +94,15 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + } + + ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)time(NULL), event, msg); +- ++ GF_FREE(msg); + if (ret <= 0) { + ret = EVENT_ERROR_MSG_FORMAT; + goto out; + } + + /* Send Message */ +- if (sendto(sock, eventstr, strlen(eventstr), 0, (struct sockaddr *)&server, +- sizeof(server)) <= 0) { ++ if (sendto(sock, eventstr, strlen(eventstr), 0, result->ai_addr, ++ result->ai_addrlen) <= 0) { + ret = EVENT_ERROR_SEND; + goto out; + } +@@ -133,17 +114,10 @@ out: + sys_close(sock); + } + +- /* Allocated by gf_vasprintf */ +- if (msg) +- GF_FREE(msg); +- + /* Allocated by gf_asprintf */ + if (eventstr) + GF_FREE(eventstr); + +- if (host) +- GF_FREE(host); +- + if (result) + freeaddrinfo(result); + +-- +1.8.3.1 + diff --git a/SOURCES/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch b/SOURCES/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch new file mode 100644 index 0000000..45622d9 --- /dev/null +++ b/SOURCES/0418-md-cache-avoid-clearing-cache-when-not-necessary.patch @@ -0,0 +1,439 @@ +From 7ad8c03a28fca67150972cda964ebe9233766b54 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Mon, 30 Mar 2020 11:09:39 +0200 +Subject: [PATCH 418/449] md-cache: avoid clearing cache when not necessary + +mdc_inode_xatt_set() blindly cleared current cache when dict was not +NULL, even if there was no xattr requested. + +This patch fixes this by only calling mdc_inode_xatt_set() when we have +explicitly requested something to cache. + +Backport of: +> Upstream-patch-link: https://review.gluster.org/24267 +> Change-Id: Idc91a4693f1ff39f7059acde26682ccc361b947d +> Fixes: #1140 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1815434 +Change-Id: Idc91a4693f1ff39f7059acde26682ccc361b947d +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202487 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/md-cache/src/md-cache.c | 165 ++++++++++++++++------------ + 1 file changed, 93 insertions(+), 72 deletions(-) + +diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c +index a6b363f..bbbee3b 100644 +--- a/xlators/performance/md-cache/src/md-cache.c ++++ b/xlators/performance/md-cache/src/md-cache.c +@@ -133,6 +133,7 @@ struct mdc_local { + char *key; + dict_t *xattr; + uint64_t incident_time; ++ bool update_cache; + }; + + int +@@ -969,7 +970,7 @@ out: + return ret; + } + +-void ++static bool + mdc_load_reqs(xlator_t *this, dict_t *dict) + { + struct mdc_conf *conf = this->private; +@@ -978,6 +979,7 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) + char *tmp = NULL; + char *tmp1 = NULL; + int ret = 0; ++ bool loaded = false; + + tmp1 = conf->mdc_xattr_str; + if (!tmp1) +@@ -995,13 +997,17 @@ mdc_load_reqs(xlator_t *this, dict_t *dict) + conf->mdc_xattr_str = NULL; + gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE, + "Disabled cache for xattrs, dict_set failed"); ++ goto out; + } + pattern = strtok_r(NULL, ",", &tmp); + } + +- GF_FREE(mdc_xattr_str); ++ loaded = true; ++ + out: +- return; ++ GF_FREE(mdc_xattr_str); ++ ++ return loaded; + } + + struct checkpair { +@@ -1092,6 +1098,25 @@ err: + return ret; + } + ++static dict_t * ++mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata) ++{ ++ if (xdata == NULL) { ++ xdata = dict_new(); ++ if (xdata == NULL) { ++ local->update_cache = false; ++ ++ return NULL; ++ } ++ } else { ++ dict_ref(xdata); ++ } ++ ++ local->update_cache = mdc_load_reqs(this, xdata); ++ ++ return xdata; ++} ++ + int + mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, +@@ -1201,7 +1226,9 @@ mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + + if (local->loc.inode) { + mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time); +- mdc_inode_xatt_set(this, local->loc.inode, dict); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->loc.inode, dict); ++ } + } + out: + MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict, +@@ -1220,7 +1247,6 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + 0, + }; + dict_t *xattr_rsp = NULL; +- dict_t *xattr_alloc = NULL; + mdc_local_t *local = NULL; + struct mdc_conf *conf = this->private; + +@@ -1271,18 +1297,18 @@ mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + return 0; + + uncached: +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + + if (xattr_rsp) + dict_unref(xattr_rsp); +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -1305,7 +1331,9 @@ mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + } + + mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time); +- mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata); +@@ -1319,7 +1347,6 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + int ret; + struct iatt stbuf; + mdc_local_t *local = NULL; +- dict_t *xattr_alloc = NULL; + struct mdc_conf *conf = this->private; + + local = mdc_local_get(frame, loc->inode); +@@ -1343,17 +1370,16 @@ mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + return 0; + + uncached: +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + GF_ATOMIC_INC(conf->mdc_counter.stat_miss); + STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -1376,7 +1402,9 @@ mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + } + + mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time); +- mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata); +@@ -1390,7 +1418,6 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + int ret; + struct iatt stbuf; + mdc_local_t *local = NULL; +- dict_t *xattr_alloc = NULL; + struct mdc_conf *conf = this->private; + + local = mdc_local_get(frame, fd->inode); +@@ -1409,17 +1436,16 @@ mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + return 0; + + uncached: +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + GF_ATOMIC_INC(conf->mdc_counter.stat_miss); + STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -2393,7 +2419,9 @@ mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + +- mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->loc.inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata); +@@ -2410,7 +2438,6 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + mdc_local_t *local = NULL; + dict_t *xattr = NULL; + struct mdc_conf *conf = this->private; +- dict_t *xattr_alloc = NULL; + gf_boolean_t key_satisfied = _gf_true; + + local = mdc_local_get(frame, loc->inode); +@@ -2443,18 +2470,17 @@ mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + + uncached: + if (key_satisfied) { +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + } + + GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); + STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, key, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (key_satisfied && (xdata != NULL)) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -2481,7 +2507,9 @@ mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + goto out; + } + +- mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, local->fd->inode, xdata); ++ } + + out: + MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata); +@@ -2498,7 +2526,6 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + dict_t *xattr = NULL; + int op_errno = ENODATA; + struct mdc_conf *conf = this->private; +- dict_t *xattr_alloc = NULL; + gf_boolean_t key_satisfied = _gf_true; + + local = mdc_local_get(frame, fd->inode); +@@ -2531,18 +2558,17 @@ mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + + uncached: + if (key_satisfied) { +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + } + + GF_ATOMIC_INC(conf->mdc_counter.xattr_miss); + STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (key_satisfied && (xdata != NULL)) { ++ dict_unref(xdata); ++ } ++ + return 0; + } + +@@ -2752,27 +2778,22 @@ int + mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) + { +- dict_t *xattr_alloc = NULL; + mdc_local_t *local = NULL; + + local = mdc_local_get(frame, loc->inode); + + loc_copy(&local->loc, loc); + +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- +- if (xdata) { +- /* Tell readdir-ahead to include these keys in xdata when it +- * internally issues readdirp() in it's opendir_cbk */ +- mdc_load_reqs(this, xdata); +- } ++ /* Tell readdir-ahead to include these keys in xdata when it ++ * internally issues readdirp() in it's opendir_cbk */ ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } + + return 0; + } +@@ -2800,7 +2821,9 @@ mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + continue; + mdc_inode_iatt_set(this, entry->inode, &entry->d_stat, + local->incident_time); +- mdc_inode_xatt_set(this, entry->inode, entry->dict); ++ if (local->update_cache) { ++ mdc_inode_xatt_set(this, entry->inode, entry->dict); ++ } + } + + unwind: +@@ -2812,7 +2835,6 @@ int + mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) + { +- dict_t *xattr_alloc = NULL; + mdc_local_t *local = NULL; + + local = mdc_local_get(frame, fd->inode); +@@ -2821,15 +2843,15 @@ mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + + local->fd = fd_ref(fd); + +- if (!xdata) +- xdata = xattr_alloc = dict_new(); +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); +- if (xattr_alloc) +- dict_unref(xattr_alloc); ++ ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } ++ + return 0; + out: + MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); +@@ -2860,7 +2882,6 @@ int + mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) + { +- int need_unref = 0; + mdc_local_t *local = NULL; + struct mdc_conf *conf = this->private; + +@@ -2876,19 +2897,14 @@ mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + return 0; + } + +- if (!xdata) { +- xdata = dict_new(); +- need_unref = 1; +- } +- +- if (xdata) +- mdc_load_reqs(this, xdata); ++ xdata = mdc_prepare_request(this, local, xdata); + + STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); + +- if (need_unref && xdata) ++ if (xdata != NULL) { + dict_unref(xdata); ++ } + + return 0; + unwind: +@@ -3468,7 +3484,12 @@ mdc_register_xattr_inval(xlator_t *this) + goto out; + } + +- mdc_load_reqs(this, xattr); ++ if (!mdc_load_reqs(this, xattr)) { ++ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY, ++ "failed to populate cache entries"); ++ ret = -1; ++ goto out; ++ } + + frame = create_frame(this, this->ctx->pool); + if (!frame) { +-- +1.8.3.1 + diff --git a/SOURCES/0419-cluster-afr-fix-race-when-bricks-come-up.patch b/SOURCES/0419-cluster-afr-fix-race-when-bricks-come-up.patch new file mode 100644 index 0000000..ea8c2ea --- /dev/null +++ b/SOURCES/0419-cluster-afr-fix-race-when-bricks-come-up.patch @@ -0,0 +1,104 @@ +From b9b479de2a7fd1c5eefa7aa1142e0a39e0c96ca9 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Sun, 1 Mar 2020 19:49:04 +0100 +Subject: [PATCH 419/449] cluster/afr: fix race when bricks come up + +The was a problem when self-heal was sending lookups at the same time +that one of the bricks was coming up. In this case there was a chance +that the number of 'up' bricks changes in the middle of sending the +requests to subvolumes which caused a discrepancy in the expected +number of replies and the actual number of sent requests. + +This discrepancy caused that AFR continued executing requests before +all requests were complete. Eventually, the frame of the pending +request was destroyed when the operation terminated, causing a use- +after-free issue when the answer was finally received. + +In theory the same thing could happen in the reverse way, i.e. AFR +tries to wait for more replies than sent requests, causing a hang. + +Backport of: +> Upstream-patch-link: https://review.gluster.org/24191 +> Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +> Fixes: bz#1808875 + +BUG: 1794663 +Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202489 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/afr/src/afr-self-heal-common.c | 6 +++--- + xlators/cluster/afr/src/afr-self-heal-name.c | 4 +++- + xlators/cluster/afr/src/afr-self-heal.h | 7 +++++-- + 3 files changed, 11 insertions(+), 6 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index ce1ea50..d942ccf 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1869,12 +1869,12 @@ int + afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, + struct afr_reply *replies) + { +- afr_private_t *priv = NULL; ++ afr_local_t *local = NULL; + +- priv = frame->this->private; ++ local = frame->local; + + return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies, +- priv->child_up); ++ local->child_up); + } + + unsigned int +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 7d4f208..dace071 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -560,13 +560,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this, + struct afr_reply *replies = NULL; + inode_t *inode = NULL; + int first_idx = -1; ++ afr_local_t *local = NULL; + + priv = this->private; ++ local = frame->local; + + replies = alloca0(sizeof(*replies) * priv->child_count); + + inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies, +- priv->child_up, NULL); ++ local->child_up, NULL); + if (!inode) + return -ENOMEM; + +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 8234cec..f7ecf5d 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -46,13 +46,16 @@ + afr_local_t *__local = frame->local; \ + afr_private_t *__priv = frame->this->private; \ + int __i = 0; \ +- int __count = AFR_COUNT(list, __priv->child_count); \ ++ int __count = 0; \ ++ unsigned char *__list = alloca(__priv->child_count); \ + \ ++ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \ ++ __count = AFR_COUNT(__list, __priv->child_count); \ + __local->barrier.waitfor = __count; \ + afr_local_replies_wipe(__local, __priv); \ + \ + for (__i = 0; __i < __priv->child_count; __i++) { \ +- if (!list[__i]) \ ++ if (!__list[__i]) \ + continue; \ + STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \ + __priv->children[__i], \ +-- +1.8.3.1 + diff --git a/SOURCES/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch b/SOURCES/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch new file mode 100644 index 0000000..cb27b33 --- /dev/null +++ b/SOURCES/0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch @@ -0,0 +1,46 @@ +From 42a05c7f8464f529f53bced31a64ea373e16f58b Mon Sep 17 00:00:00 2001 +From: Hari Gowtham <hgowtham@redhat.com> +Date: Thu, 24 Oct 2019 17:40:44 +0530 +Subject: [PATCH 420/449] scripts: quota_fsck script TypeError: %d format:not + dict + +Problem: One of the prints in the script have been using +%i as the format for printing which doesn't work. + +Fix: use %s as the format in the place of %i + +>Fixes: bz#1764129 +>Change-Id: I4480ede7bf62906ddedbe5f880a1e89c76946641 +>Signed-off-by: Hari Gowtham <hgowtham@redhat.com> +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23586/ + +BUG: 1786681 +Change-Id: I4480ede7bf62906ddedbe5f880a1e89c76946641 +Signed-off-by: hari gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202484 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunny Kumar <sunkumar@redhat.com> +--- + extras/quota/quota_fsck.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py +index 485a37a..174f2a2 100755 +--- a/extras/quota/quota_fsck.py ++++ b/extras/quota/quota_fsck.py +@@ -58,10 +58,10 @@ def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None): + elif log_type == QUOTA_SIZE_MISMATCH: + print("mismatch") + if dir_size is not None: +- print('%24s %60s %12s %12s' % ("Size Mismatch", path, ++ print('%24s %60s %12s %12s' % ("Size Mismatch", path, + xattr_dict, dir_size)) + else: +- print('%-24s %-60s %-12i %-12i' % ("Size Mismatch", path, xattr_dict, ++ print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict, + stbuf.st_size)) + + def size_differs_lot(s1, s2): +-- +1.8.3.1 + diff --git a/SOURCES/0421-Improve-logging-in-EC-client-and-lock-translator.patch b/SOURCES/0421-Improve-logging-in-EC-client-and-lock-translator.patch new file mode 100644 index 0000000..06f0304 --- /dev/null +++ b/SOURCES/0421-Improve-logging-in-EC-client-and-lock-translator.patch @@ -0,0 +1,93 @@ +From 8267e5e97327633bf21fd02df8d52e3a97f0f9ea Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Wed, 4 Dec 2019 17:06:18 +0530 +Subject: [PATCH 421/449] Improve logging in EC, client and lock translator + +BUG: 1787294 +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23814/ +> Change-Id: I98af8672a25ff9fd9dba91a2e1384719f9155255 +> Fixes: bz#1779760 + +Change-Id: I5cb04993f12d6248f2349a0c5a9e2c0ceecaf528 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202533 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-combine.c | 5 +++-- + xlators/cluster/ec/src/ec-common.c | 2 +- + xlators/features/locks/src/inodelk.c | 12 ++++++++---- + 3 files changed, 12 insertions(+), 7 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c +index c5af2ab..99e5534 100644 +--- a/xlators/cluster/ec/src/ec-combine.c ++++ b/xlators/cluster/ec/src/ec-combine.c +@@ -179,13 +179,14 @@ ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src, + "links: %u-%u, uid: %u-%u, gid: %u-%u, " + "rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64 + ", " +- "mode: %o-%o)", ++ "mode: %o-%o), %s", + dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink, + src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid, + src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev, + dst[i].ia_size, src[i].ia_size, + st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type), +- st_mode_from_ia(src[i].ia_prot, dst[i].ia_type)); ++ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type), ++ ec_msg_str(fop)); + + return 0; + } +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index 5cae37b..e580bfb 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -2240,7 +2240,7 @@ ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED, +- "entry/inode unlocking failed (%s)", ec_fop_name(link->fop->id)); ++ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop)); + } else { + ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock); + } +diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c +index df00ede..a9c42f1 100644 +--- a/xlators/features/locks/src/inodelk.c ++++ b/xlators/features/locks/src/inodelk.c +@@ -502,22 +502,26 @@ static pl_inode_lock_t * + __inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) + { + pl_inode_lock_t *conf = NULL; ++ inode_t *inode = NULL; ++ ++ inode = lock->pl_inode->inode; + + conf = find_matching_inodelk(lock, dom); + if (!conf) { + gf_log(this->name, GF_LOG_ERROR, + " Matching lock not found for unlock %llu-%llu, by %s " +- "on %p", ++ "on %p for gfid:%s", + (unsigned long long)lock->fl_start, + (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner), +- lock->client); ++ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); + goto out; + } + __delete_inode_lock(conf); + gf_log(this->name, GF_LOG_DEBUG, +- " Matching lock found for unlock %llu-%llu, by %s on %p", ++ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s", + (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end, +- lkowner_utoa(&lock->owner), lock->client); ++ lkowner_utoa(&lock->owner), lock->client, ++ inode ? uuid_utoa(inode->gfid) : "UNKNOWN"); + + out: + return conf; +-- +1.8.3.1 + diff --git a/SOURCES/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch b/SOURCES/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch new file mode 100644 index 0000000..400ba67 --- /dev/null +++ b/SOURCES/0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch @@ -0,0 +1,236 @@ +From 8b11ac1575ef167af2a47a96f7b7ed0f32bb5897 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Fri, 5 Jun 2020 17:20:04 +0530 +Subject: [PATCH 422/449] cluster/afr: Prioritize ENOSPC over other errors + +Backport of: https://review.gluster.org/#/c/glusterfs/+/24477/ + +Problem: +In a replicate/arbiter volume if file creations or writes fails on +quorum number of bricks and on one brick it is due to ENOSPC and +on other brick it fails for a different reason, it may fail with +errors other than ENOSPC in some cases. + +Fix: +Prioritize ENOSPC over other lesser priority errors and do not set +op_errno in posix_gfid_set if op_ret is 0 to avoid receiving any +error_no which can be misinterpreted by __afr_dir_write_finalize(). + +Also removing the function afr_has_arbiter_fop_cbk_quorum() which +might consider a successful reply form a single brick as quorum +success in some cases, whereas we always need fop to be successful +on quorum number of bricks in arbiter configuration. + +Change-Id: I4dd2bff17e6812bc7c8372130976e365e2407d88 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +BUG: 1837467 +Reviewed-on: https://code.engineering.redhat.com/gerrit/202526 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bugs/replicate/issue-1254-prioritize-enospc.t | 80 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-common.c | 4 +- + xlators/cluster/afr/src/afr-transaction.c | 48 +------------ + xlators/storage/posix/src/posix-helpers.c | 2 +- + 4 files changed, 86 insertions(+), 48 deletions(-) + create mode 100644 tests/bugs/replicate/issue-1254-prioritize-enospc.t + +diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +new file mode 100644 +index 0000000..fab94b7 +--- /dev/null ++++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t +@@ -0,0 +1,80 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup ++ ++function create_bricks { ++ TEST truncate -s 100M $B0/brick0 ++ TEST truncate -s 100M $B0/brick1 ++ TEST truncate -s 20M $B0/brick2 ++ LO1=`SETUP_LOOP $B0/brick0` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO1 ++ LO2=`SETUP_LOOP $B0/brick1` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO2 ++ LO3=`SETUP_LOOP $B0/brick2` ++ TEST [ $? -eq 0 ] ++ TEST MKFS_LOOP $LO3 ++ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2 ++ TEST MOUNT_LOOP $LO1 $B0/${V0}0 ++ TEST MOUNT_LOOP $LO2 $B0/${V0}1 ++ TEST MOUNT_LOOP $LO3 $B0/${V0}2 ++} ++ ++function create_files { ++ local i=1 ++ while (true) ++ do ++ touch $M0/file$i ++ if [ -e $B0/${V0}2/file$i ]; ++ then ++ ((i++)) ++ else ++ break ++ fi ++ done ++} ++ ++TESTS_EXPECTED_IN_LOOP=13 ++ ++#Arbiter volume: Check for ENOSPC when arbiter brick becomes full# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((echo "Test" > $M0/file-3) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup ++ ++#Replica-3 volume: Check for ENOSPC when one of the brick becomes full# ++#Keeping the third brick of lower size to simulate disk full scenario# ++TEST glusterd ++create_bricks ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 ++ ++create_files ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++error1=$(touch $M0/file-1 2>&1) ++EXPECT "No space left on device" echo $error1 ++error2=$(mkdir $M0/dir-1 2>&1) ++EXPECT "No space left on device" echo $error2 ++error3=$((cat /dev/zero > $M0/file1) 2>&1) ++EXPECT "No space left on device" echo $error3 ++ ++cleanup +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 5806556..59710aa 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2464,7 +2464,7 @@ error: + * others in that they must be given higher priority while + * returning to the user. + * +- * The hierarchy is ENODATA > ENOENT > ESTALE > others ++ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others + */ + + int +@@ -2476,6 +2476,8 @@ afr_higher_errno(int32_t old_errno, int32_t new_errno) + return ENOENT; + if (old_errno == ESTALE || new_errno == ESTALE) + return ESTALE; ++ if (old_errno == ENOSPC || new_errno == ENOSPC) ++ return ENOSPC; + + return new_errno; + } +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 15f3a7e..8e65ae2 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -514,42 +514,6 @@ afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this) + local->transaction.pre_op_sources[j] = 0; + } + +-gf_boolean_t +-afr_has_arbiter_fop_cbk_quorum(call_frame_t *frame) +-{ +- afr_local_t *local = NULL; +- afr_private_t *priv = NULL; +- xlator_t *this = NULL; +- gf_boolean_t fop_failed = _gf_false; +- unsigned char *pre_op_sources = NULL; +- int i = 0; +- +- local = frame->local; +- this = frame->this; +- priv = this->private; +- pre_op_sources = local->transaction.pre_op_sources; +- +- /* If the fop failed on the brick, it is not a source. */ +- for (i = 0; i < priv->child_count; i++) +- if (local->transaction.failed_subvols[i]) +- pre_op_sources[i] = 0; +- +- switch (AFR_COUNT(pre_op_sources, priv->child_count)) { +- case 1: +- if (pre_op_sources[ARBITER_BRICK_INDEX]) +- fop_failed = _gf_true; +- break; +- case 0: +- fop_failed = _gf_true; +- break; +- } +- +- if (fop_failed) +- return _gf_false; +- +- return _gf_true; +-} +- + void + afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this) + { +@@ -968,12 +932,8 @@ afr_need_dirty_marking(call_frame_t *frame, xlator_t *this) + priv->child_count) + return _gf_false; + +- if (priv->arbiter_count) { +- if (!afr_has_arbiter_fop_cbk_quorum(frame)) +- need_dirty = _gf_true; +- } else if (!afr_has_fop_cbk_quorum(frame)) { ++ if (!afr_has_fop_cbk_quorum(frame)) + need_dirty = _gf_true; +- } + + return need_dirty; + } +@@ -1023,12 +983,8 @@ afr_handle_quorum(call_frame_t *frame, xlator_t *this) + * no split-brain with the fix. The problem is eliminated completely. + */ + +- if (priv->arbiter_count) { +- if (afr_has_arbiter_fop_cbk_quorum(frame)) +- return; +- } else if (afr_has_fop_cbk_quorum(frame)) { ++ if (afr_has_fop_cbk_quorum(frame)) + return; +- } + + if (afr_need_dirty_marking(frame, this)) + goto set_response; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 2c27d22..949c799 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1059,7 +1059,7 @@ verify_handle: + ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); + + out: +- if (!(*op_errno)) ++ if (ret && !(*op_errno)) + *op_errno = errno; + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch b/SOURCES/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch new file mode 100644 index 0000000..6a547ea --- /dev/null +++ b/SOURCES/0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch @@ -0,0 +1,128 @@ +From c140d30382306d08eaf2bc5c53e5be26d3e381e1 Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 18 Nov 2019 05:24:33 -0500 +Subject: [PATCH 423/449] ctime: Fix ctime inconsisteny with utimensat + +Problem: +When touch is used to create a file, the ctime is not matching +atime and mtime which ideally should match. There is a difference +in nano seconds. + +Cause: +When touch is used modify atime or mtime to current time (UTIME_NOW), +the current time is taken from kernel. The ctime gets updated to current +time when atime or mtime is updated. But the current time to update +ctime is taken from utime xlator. Hence the difference in nano seconds. + +Fix: +When utimesat uses UTIME_NOW, use the current time from kernel. + +>fixes: bz#1773530 +>Change-Id: I9ccfa47dcd39df23396852b4216f1773c49250ce +>Signed-off-by: Kotresh HR <khiremat@redhat.com> + +backport of: https://review.gluster.org/#/c/glusterfs/+/23719/ +BUG: 1761932 +Change-Id: I9ccfa47dcd39df23396852b4216f1773c49250ce +Signed-off-by: Kotresh HR <khiremat@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202541 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 2 ++ + tests/basic/ctime/ctime-utimesat.t | 28 ++++++++++++++++++++++++++ + xlators/features/utime/src/utime-gen-fops-c.py | 10 +++++++++ + xlators/mount/fuse/src/fuse-bridge.c | 8 ++++++++ + 4 files changed, 48 insertions(+) + create mode 100644 tests/basic/ctime/ctime-utimesat.t + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index da551e9..db04c4d 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -35,6 +35,8 @@ + #define GF_SET_ATTR_ATIME 0x10 + #define GF_SET_ATTR_MTIME 0x20 + #define GF_SET_ATTR_CTIME 0x40 ++#define GF_ATTR_ATIME_NOW 0x80 ++#define GF_ATTR_MTIME_NOW 0x100 + + #define gf_attr_mode_set(mode) ((mode)&GF_SET_ATTR_MODE) + #define gf_attr_uid_set(mode) ((mode)&GF_SET_ATTR_UID) +diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t +new file mode 100644 +index 0000000..540e57a +--- /dev/null ++++ b/tests/basic/ctime/ctime-utimesat.t +@@ -0,0 +1,28 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.read-after-open off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.io-cache off ++ ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++touch $M0/FILE ++ ++atime=$(stat -c "%.X" $M0/FILE) ++EXPECT $atime stat -c "%.Y" $M0/FILE ++EXPECT $atime stat -c "%.Z" $M0/FILE ++ ++cleanup +diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py +index 8730a51..9fb3e1b 100755 +--- a/xlators/features/utime/src/utime-gen-fops-c.py ++++ b/xlators/features/utime/src/utime-gen-fops-c.py +@@ -95,6 +95,16 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + frame->root->flags |= MDATA_CTIME; + } + ++ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { ++ if (valid & GF_ATTR_ATIME_NOW) { ++ frame->root->ctime.tv_sec = stbuf->ia_atime; ++ frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec; ++ } else if (valid & GF_ATTR_MTIME_NOW) { ++ frame->root->ctime.tv_sec = stbuf->ia_mtime; ++ frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec; ++ } ++ } ++ + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 6e99053..fdeec49 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -1706,6 +1706,14 @@ fattr_to_gf_set_attr(int32_t valid) + gf_valid |= GF_SET_ATTR_CTIME; + #endif + ++#if FUSE_KERNEL_MINOR_VERSION >= 9 ++ if (valid & FATTR_ATIME_NOW) ++ gf_valid |= GF_ATTR_ATIME_NOW; ++ ++ if (valid & FATTR_MTIME_NOW) ++ gf_valid |= GF_ATTR_MTIME_NOW; ++#endif ++ + if (valid & FATTR_SIZE) + gf_valid |= GF_SET_ATTR_SIZE; + +-- +1.8.3.1 + diff --git a/SOURCES/0424-afr-make-heal-info-lockless.patch b/SOURCES/0424-afr-make-heal-info-lockless.patch new file mode 100644 index 0000000..593fa34 --- /dev/null +++ b/SOURCES/0424-afr-make-heal-info-lockless.patch @@ -0,0 +1,884 @@ +From 54d4ea44fec96560aad9c41f7e4f5aad164ffb8b Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Fri, 5 Jun 2020 14:14:15 +0530 +Subject: [PATCH 424/449] afr: make heal info lockless + +Changes in locks xlator: +Added support for per-domain inodelk count requests. +Caller needs to set GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS key in the +dict and then set each key with name +'GLUSTERFS_INODELK_DOM_PREFIX:<domain name>'. +In the response dict, the xlator will send the per domain count as +values for each of these keys. + +Changes in AFR: +Replaced afr_selfheal_locked_inspect() with afr_lockless_inspect(). Logic has +been added to make the latter behave same as the former, thus not +breaking the current heal info output behaviour. + +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23771/ +> fixes: bz#1774011 +> Change-Id: Ie9e83c162aa77f44a39c2ba7115de558120ada4d + +BUG: 1721355 +Change-Id: I8ed4b504880b19e00068312efd90cd0706787404 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202490 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + heal/src/glfs-heal.c | 17 +- + libglusterfs/src/glusterfs/glusterfs.h | 2 + + xlators/cluster/afr/src/afr-common.c | 367 +++++++++++-------------- + xlators/cluster/afr/src/afr-self-heal-common.c | 43 ++- + xlators/cluster/afr/src/afr-self-heal.h | 3 +- + xlators/features/locks/src/common.h | 4 + + xlators/features/locks/src/locks.h | 8 + + xlators/features/locks/src/posix.c | 117 +++++++- + 8 files changed, 338 insertions(+), 223 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 125b12c..5af9e31 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -775,7 +775,8 @@ static int + glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + uint64_t *offset, num_entries_t *num_entries, + print_status glfsh_print_status, +- gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode) ++ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode, ++ dict_t *xattr_req) + { + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; +@@ -807,7 +808,7 @@ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + + gf_uuid_parse(entry->d_name, gfid); + gf_uuid_copy(loc.gfid, gfid); +- ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL); ++ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, xattr_req, NULL); + if (ret) { + if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN)) + goto out; +@@ -876,19 +877,19 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + if (heal_op == GF_SHD_OP_INDEX_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_heal_status, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_spb_status, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_summary_status, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) { +@@ -897,7 +898,7 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_heal_status_boolean, +- ignore, mode); ++ ignore, mode, xattr_req); + if (ret < 0) + goto out; + } +@@ -951,6 +952,10 @@ glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + int32_t op_errno = 0; + gf_boolean_t ignore = _gf_false; + ++ ret = dict_set_str(xattr_req, "index-vgfid", vgfid); ++ if (ret) ++ return ret; ++ + if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID)) + ignore = _gf_true; + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 3b594c0..177a020 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -217,6 +217,8 @@ enum gf_internal_fop_indicator { + #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count" + #define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk" + #define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count" ++#define GLUSTERFS_INODELK_DOM_PREFIX "glusterfs.inodelk-dom-prefix" ++#define GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS "glusterfs.multi-dom-lk-cnt-req" + #define GFID_TO_PATH_KEY "glusterfs.gfid2path" + #define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime" + #define GF_XATTR_XTIME_PATTERN "trusted.glusterfs.*.xtime" +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 59710aa..c355ec5 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5908,259 +5908,218 @@ out: + return _gf_true; + } + +-int +-afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this, +- inode_t *inode, gf_boolean_t *msh, +- unsigned char *pending) ++static dict_t * ++afr_set_heal_info(char *status) + { ++ dict_t *dict = NULL; + int ret = -1; +- unsigned char *locked_on = NULL; +- unsigned char *sources = NULL; +- unsigned char *sinks = NULL; +- unsigned char *healed_sinks = NULL; +- unsigned char *undid_pending = NULL; +- struct afr_reply *locked_replies = NULL; +- +- afr_private_t *priv = this->private; + +- locked_on = alloca0(priv->child_count); +- sources = alloca0(priv->child_count); +- sinks = alloca0(priv->child_count); +- healed_sinks = alloca0(priv->child_count); +- undid_pending = alloca0(priv->child_count); ++ dict = dict_new(); ++ if (!dict) { ++ ret = -ENOMEM; ++ goto out; ++ } + +- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); ++ ret = dict_set_dynstr_sizen(dict, "heal-info", status); ++ if (ret) ++ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, ++ "Failed to set heal-info key to " ++ "%s", ++ status); ++out: ++ /* Any error other than EINVAL, dict_set_dynstr frees status */ ++ if (ret == -ENOMEM || ret == -EINVAL) { ++ GF_FREE(status); ++ } + +- ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, +- locked_on); +- { +- if (ret == 0) { +- /* Not a single lock */ +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; /* all invalid responses */ +- goto out; +- } +- ret = __afr_selfheal_metadata_prepare( +- frame, this, inode, locked_on, sources, sinks, healed_sinks, +- undid_pending, locked_replies, pending); +- *msh = afr_decide_heal_info(priv, sources, ret); ++ if (ret && dict) { ++ dict_unref(dict); ++ dict = NULL; + } +- afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0, +- locked_on); +-out: +- if (locked_replies) +- afr_replies_wipe(locked_replies, priv->child_count); +- return ret; ++ return dict; + } + +-int +-afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd, +- gf_boolean_t *dsh, unsigned char *pflag) ++static gf_boolean_t ++afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies, ++ afr_transaction_type type) + { +- int ret = -1; +- unsigned char *data_lock = NULL; +- unsigned char *sources = NULL; +- unsigned char *sinks = NULL; +- unsigned char *healed_sinks = NULL; +- unsigned char *undid_pending = NULL; +- afr_private_t *priv = NULL; +- struct afr_reply *locked_replies = NULL; +- inode_t *inode = fd->inode; ++ afr_private_t *priv = this->private; ++ int *dirty = alloca0(priv->child_count * sizeof(int)); ++ int i = 0; + +- priv = this->private; +- data_lock = alloca0(priv->child_count); +- sources = alloca0(priv->child_count); +- sinks = alloca0(priv->child_count); +- healed_sinks = alloca0(priv->child_count); +- undid_pending = alloca0(priv->child_count); ++ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL); ++ for (i = 0; i < priv->child_count; i++) { ++ if (dirty[i] > 1) ++ return _gf_true; ++ } + +- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); ++ return _gf_false; ++} + +- ret = afr_selfheal_inodelk(frame, this, inode, this->name, 0, 0, data_lock); +- { +- if (ret == 0) { +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; /* all invalid responses */ +- goto out; +- } +- ret = __afr_selfheal_data_prepare(frame, this, inode, data_lock, +- sources, sinks, healed_sinks, +- undid_pending, locked_replies, pflag); +- *dsh = afr_decide_heal_info(priv, sources, ret); ++static gf_boolean_t ++afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies, ++ ia_type_t ia_type) ++{ ++ gf_boolean_t data_chk = _gf_false; ++ gf_boolean_t mdata_chk = _gf_false; ++ gf_boolean_t entry_chk = _gf_false; ++ ++ switch (ia_type) { ++ case IA_IFDIR: ++ mdata_chk = _gf_true; ++ entry_chk = _gf_true; ++ break; ++ case IA_IFREG: ++ mdata_chk = _gf_true; ++ data_chk = _gf_true; ++ break; ++ default: ++ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/ ++ mdata_chk = _gf_true; ++ break; + } +- afr_selfheal_uninodelk(frame, this, inode, this->name, 0, 0, data_lock); +-out: +- if (locked_replies) +- afr_replies_wipe(locked_replies, priv->child_count); +- return ret; ++ ++ if (data_chk && afr_is_dirty_count_non_unary_for_txn( ++ this, replies, AFR_DATA_TRANSACTION)) { ++ return _gf_true; ++ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn( ++ this, replies, AFR_METADATA_TRANSACTION)) { ++ return _gf_true; ++ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn( ++ this, replies, AFR_ENTRY_TRANSACTION)) { ++ return _gf_true; ++ } ++ ++ return _gf_false; + } + +-int +-afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this, +- inode_t *inode, gf_boolean_t *esh, +- unsigned char *pflag) ++static int ++afr_update_heal_status(xlator_t *this, struct afr_reply *replies, ++ char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh, ++ gf_boolean_t *dsh, gf_boolean_t *msh) + { + int ret = -1; +- int source = -1; ++ GF_UNUSED int ret1 = 0; ++ int i = 0; ++ int io_domain_lk_count = 0; ++ int shd_domain_lk_count = 0; + afr_private_t *priv = NULL; +- unsigned char *locked_on = NULL; +- unsigned char *data_lock = NULL; +- unsigned char *sources = NULL; +- unsigned char *sinks = NULL; +- unsigned char *healed_sinks = NULL; +- struct afr_reply *locked_replies = NULL; +- gf_boolean_t granular_locks = _gf_false; ++ char *key1 = NULL; ++ char *key2 = NULL; + + priv = this->private; +- granular_locks = priv->granular_locks; /*Assign to local variable so that +- reconfigure doesn't change this +- value between locking and unlocking +- below*/ +- locked_on = alloca0(priv->child_count); +- data_lock = alloca0(priv->child_count); +- sources = alloca0(priv->child_count); +- sinks = alloca0(priv->child_count); +- healed_sinks = alloca0(priv->child_count); +- +- locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count); ++ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(this->name)); ++ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(priv->sh_domain)); ++ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name); ++ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain); + +- if (!granular_locks) { +- ret = afr_selfheal_tryentrylk(frame, this, inode, priv->sh_domain, NULL, +- locked_on); +- } +- { +- if (!granular_locks && ret == 0) { +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; /* all invalid responses */ +- goto out; ++ for (i = 0; i < priv->child_count; i++) { ++ if ((replies[i].valid != 1) || (replies[i].op_ret != 0)) ++ continue; ++ if (!io_domain_lk_count) { ++ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count); + } ++ if (!shd_domain_lk_count) { ++ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count); ++ } ++ } + +- ret = afr_selfheal_entrylk(frame, this, inode, this->name, NULL, +- data_lock); +- { +- if (ret == 0) { +- ret = -afr_final_errno(frame->local, priv); +- if (ret == 0) +- ret = -ENOTCONN; +- /* all invalid responses */ +- goto unlock; +- } +- ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock, +- sources, sinks, healed_sinks, +- locked_replies, &source, pflag); +- if ((ret == 0) && (*pflag & PFLAG_SBRAIN)) +- ret = -EIO; +- *esh = afr_decide_heal_info(priv, sources, ret); ++ if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) { ++ if (shd_domain_lk_count) { ++ ret = -EAGAIN; /*For 'possibly-healing'. */ ++ } else { ++ ret = 0; /*needs heal. Just set a non -ve value so that it is ++ assumed as the source index.*/ ++ } ++ } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) { ++ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) || ++ (!io_domain_lk_count)) { ++ /* Needs heal. */ ++ ret = 0; ++ } else { ++ /* No heal needed. */ ++ *dsh = *esh = *msh = 0; + } +- afr_selfheal_unentrylk(frame, this, inode, this->name, NULL, data_lock, +- NULL); + } +-unlock: +- if (!granular_locks) +- afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL, +- locked_on, NULL); +-out: +- if (locked_replies) +- afr_replies_wipe(locked_replies, priv->child_count); + return ret; + } + ++/*return EIO, EAGAIN or pending*/ + int +-afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, +- inode_t **inode, gf_boolean_t *entry_selfheal, +- gf_boolean_t *data_selfheal, +- gf_boolean_t *metadata_selfheal, +- unsigned char *pending) +- ++afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, ++ inode_t **inode, char *index_vgfid, ++ gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, ++ gf_boolean_t *metadata_selfheal, unsigned char *pending) + { + int ret = -1; +- fd_t *fd = NULL; ++ int i = 0; ++ afr_private_t *priv = NULL; ++ struct afr_reply *replies = NULL; + gf_boolean_t dsh = _gf_false; + gf_boolean_t msh = _gf_false; + gf_boolean_t esh = _gf_false; ++ unsigned char *sources = NULL; ++ unsigned char *sinks = NULL; ++ unsigned char *valid_on = NULL; ++ uint64_t *witness = NULL; ++ ++ priv = this->private; ++ replies = alloca0(sizeof(*replies) * priv->child_count); ++ sources = alloca0(sizeof(*sources) * priv->child_count); ++ sinks = alloca0(sizeof(*sinks) * priv->child_count); ++ witness = alloca0(sizeof(*witness) * priv->child_count); ++ valid_on = alloca0(sizeof(*valid_on) * priv->child_count); + + ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh, +- &esh); ++ &esh, replies); + if (ret) + goto out; +- +- /* For every heal type hold locks and check if it indeed needs heal */ +- +- /* Heal-info does an open() on the file being examined so that the +- * current eager-lock holding client, if present, at some point sees +- * open-fd count being > 1 and releases the eager-lock so that heal-info +- * doesn't remain blocked forever until IO completes. +- */ +- if ((*inode)->ia_type == IA_IFREG) { +- ret = afr_selfheal_data_open(this, *inode, &fd); +- if (ret < 0) { +- gf_msg_debug(this->name, -ret, "%s: Failed to open", +- uuid_utoa((*inode)->gfid)); +- goto out; ++ for (i = 0; i < priv->child_count; i++) { ++ if (replies[i].valid && replies[i].op_ret == 0) { ++ valid_on[i] = 1; + } + } +- + if (msh) { +- ret = afr_selfheal_locked_metadata_inspect(frame, this, *inode, &msh, +- pending); +- if (ret == -EIO) ++ ret = afr_selfheal_find_direction(frame, this, replies, ++ AFR_METADATA_TRANSACTION, valid_on, ++ sources, sinks, witness, pending); ++ if (*pending & PFLAG_SBRAIN) ++ ret = -EIO; ++ if (ret) + goto out; + } +- + if (dsh) { +- ret = afr_selfheal_locked_data_inspect(frame, this, fd, &dsh, pending); +- if (ret == -EIO || (ret == -EAGAIN)) ++ ret = afr_selfheal_find_direction(frame, this, replies, ++ AFR_DATA_TRANSACTION, valid_on, ++ sources, sinks, witness, pending); ++ if (*pending & PFLAG_SBRAIN) ++ ret = -EIO; ++ if (ret) + goto out; + } +- + if (esh) { +- ret = afr_selfheal_locked_entry_inspect(frame, this, *inode, &esh, +- pending); ++ ret = afr_selfheal_find_direction(frame, this, replies, ++ AFR_ENTRY_TRANSACTION, valid_on, ++ sources, sinks, witness, pending); ++ if (*pending & PFLAG_SBRAIN) ++ ret = -EIO; ++ if (ret) ++ goto out; + } + ++ ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type, ++ &esh, &dsh, &msh); + out: + *data_selfheal = dsh; + *entry_selfheal = esh; + *metadata_selfheal = msh; +- if (fd) +- fd_unref(fd); ++ if (replies) ++ afr_replies_wipe(replies, priv->child_count); + return ret; + } + +-static dict_t * +-afr_set_heal_info(char *status) +-{ +- dict_t *dict = NULL; +- int ret = -1; +- +- dict = dict_new(); +- if (!dict) { +- ret = -ENOMEM; +- goto out; +- } +- +- ret = dict_set_dynstr_sizen(dict, "heal-info", status); +- if (ret) +- gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED, +- "Failed to set heal-info key to " +- "%s", +- status); +-out: +- /* Any error other than EINVAL, dict_set_dynstr frees status */ +- if (ret == -ENOMEM || ret == -EINVAL) { +- GF_FREE(status); +- } +- +- if (ret && dict) { +- dict_unref(dict); +- dict = NULL; +- } +- return dict; +-} +- + int + afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) + { +@@ -6174,10 +6133,18 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) + inode_t *inode = NULL; + char *substr = NULL; + char *status = NULL; ++ afr_local_t *local = NULL; ++ char *index_vgfid = NULL; ++ ++ local = frame->local; ++ if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) { ++ ret = -1; ++ goto out; ++ } + +- ret = afr_selfheal_locked_inspect(frame, this, loc->gfid, &inode, +- &entry_selfheal, &data_selfheal, +- &metadata_selfheal, &pending); ++ ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, index_vgfid, ++ &entry_selfheal, &data_selfheal, ++ &metadata_selfheal, &pending); + + if (ret == -ENOMEM) { + ret = -1; +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index d942ccf..1608f75 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1827,6 +1827,37 @@ afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent, + return inode; + } + ++static int ++afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict) ++{ ++ int ret = 0; ++ afr_private_t *priv = NULL; ++ char *key1 = NULL; ++ char *key2 = NULL; ++ ++ priv = this->private; ++ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(this->name)); ++ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 + ++ strlen(priv->sh_domain)); ++ ++ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1); ++ if (ret) ++ return ret; ++ ++ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name); ++ ret = dict_set_uint32(dict, key1, 1); ++ if (ret) ++ return ret; ++ ++ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain); ++ ret = dict_set_uint32(dict, key2, 1); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ + int + afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, +@@ -1851,6 +1882,11 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + return -ENOMEM; + } + ++ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) { ++ dict_unref(xattr_req); ++ return -1; ++ } ++ + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, gfid); + +@@ -2241,7 +2277,8 @@ int + afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **link_inode, gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, +- gf_boolean_t *entry_selfheal) ++ gf_boolean_t *entry_selfheal, ++ struct afr_reply *replies_dst) + { + afr_private_t *priv = NULL; + inode_t *inode = NULL; +@@ -2377,6 +2414,8 @@ afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + + ret = 0; + out: ++ if (replies && replies_dst) ++ afr_replies_copy(replies_dst, replies, priv->child_count); + if (inode) + inode_unref(inode); + if (replies) +@@ -2493,7 +2532,7 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid) + + ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode, + &data_selfheal, &metadata_selfheal, +- &entry_selfheal); ++ &entry_selfheal, NULL); + if (ret) + goto out; + +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index f7ecf5d..b39af02 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -327,7 +327,8 @@ int + afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + inode_t **link_inode, gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, +- gf_boolean_t *entry_selfheal); ++ gf_boolean_t *entry_selfheal, ++ struct afr_reply *replies); + + int + afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid); +diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h +index 3a74967..ea86b96 100644 +--- a/xlators/features/locks/src/common.h ++++ b/xlators/features/locks/src/common.h +@@ -45,6 +45,10 @@ + fd_unref(__local->fd); \ + if (__local->inode) \ + inode_unref(__local->inode); \ ++ if (__local->xdata) { \ ++ dict_unref(__local->xdata); \ ++ __local->xdata = NULL; \ ++ } \ + mem_put(__local); \ + } \ + } while (0) +diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h +index b817960..aa267de 100644 +--- a/xlators/features/locks/src/locks.h ++++ b/xlators/features/locks/src/locks.h +@@ -239,6 +239,7 @@ typedef struct { + gf_boolean_t inodelk_count_req; + gf_boolean_t posixlk_count_req; + gf_boolean_t parent_entrylk_req; ++ gf_boolean_t multiple_dom_lk_requests; + int update_mlock_enforced_flag; + } pl_local_t; + +@@ -260,6 +261,13 @@ typedef struct _locks_ctx { + struct list_head metalk_list; + } pl_ctx_t; + ++typedef struct _multi_dom_lk_data { ++ xlator_t *this; ++ inode_t *inode; ++ dict_t *xdata_rsp; ++ gf_boolean_t keep_max; ++} multi_dom_lk_data; ++ + typedef enum { DECREMENT, INCREMENT } pl_count_op_t; + + pl_ctx_t * +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 4592240..9a14c64 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -150,13 +150,20 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + gf_boolean_t + pl_has_xdata_requests(dict_t *xdata) + { +- static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, +- GLUSTERFS_INODELK_DOM_COUNT, GLUSTERFS_POSIXLK_COUNT, +- GLUSTERFS_PARENT_ENTRYLK, NULL}; +- static int reqs_size[] = { +- SLEN(GLUSTERFS_ENTRYLK_COUNT), SLEN(GLUSTERFS_INODELK_COUNT), +- SLEN(GLUSTERFS_INODELK_DOM_COUNT), SLEN(GLUSTERFS_POSIXLK_COUNT), +- SLEN(GLUSTERFS_PARENT_ENTRYLK), 0}; ++ static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT, ++ GLUSTERFS_INODELK_COUNT, ++ GLUSTERFS_INODELK_DOM_COUNT, ++ GLUSTERFS_POSIXLK_COUNT, ++ GLUSTERFS_PARENT_ENTRYLK, ++ GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, ++ NULL}; ++ static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT), ++ SLEN(GLUSTERFS_INODELK_COUNT), ++ SLEN(GLUSTERFS_INODELK_DOM_COUNT), ++ SLEN(GLUSTERFS_POSIXLK_COUNT), ++ SLEN(GLUSTERFS_PARENT_ENTRYLK), ++ SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS), ++ 0}; + int i = 0; + + if (!xdata) +@@ -169,12 +176,22 @@ pl_has_xdata_requests(dict_t *xdata) + return _gf_false; + } + ++static int ++dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data) ++{ ++ dict_del(dict, key); ++ return 0; ++} ++ + void + pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) + { + if (!local || !xdata) + return; + ++ GF_ASSERT(local->xdata == NULL); ++ local->xdata = dict_copy_with_ref(xdata, NULL); ++ + if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) { + local->entrylk_count_req = 1; + dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT); +@@ -183,6 +200,12 @@ pl_get_xdata_requests(pl_local_t *local, dict_t *xdata) + local->inodelk_count_req = 1; + dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT); + } ++ if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) { ++ local->multiple_dom_lk_requests = 1; ++ dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS); ++ dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", ++ dict_delete_domain_key, NULL); ++ } + + local->inodelk_dom_count_req = dict_get_sizen(xdata, + GLUSTERFS_INODELK_DOM_COUNT); +@@ -210,7 +233,7 @@ pl_needs_xdata_response(pl_local_t *local) + + if (local->parent_entrylk_req || local->entrylk_count_req || + local->inodelk_dom_count_req || local->inodelk_count_req || +- local->posixlk_count_req) ++ local->posixlk_count_req || local->multiple_dom_lk_requests) + return _gf_true; + + return _gf_false; +@@ -411,6 +434,75 @@ pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict, + } + + void ++pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict, ++ char *domname, gf_boolean_t keep_max, char *key) ++{ ++ int32_t count = 0; ++ int32_t maxcount = -1; ++ int ret = -1; ++ ++ if (keep_max) { ++ ret = dict_get_int32(dict, key, &maxcount); ++ if (ret < 0) ++ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s", ++ GLUSTERFS_INODELK_COUNT); ++ } ++ count = get_inodelk_count(this, inode, domname); ++ if (maxcount >= count) ++ return; ++ ++ ret = dict_set_int32(dict, key, count); ++ if (ret < 0) { ++ gf_msg_debug(this->name, 0, ++ "Failed to set count for " ++ "key %s", ++ key); ++ } ++ ++ return; ++} ++ ++static int ++pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value, ++ void *data) ++{ ++ multi_dom_lk_data *d = data; ++ char *tmp_key = NULL; ++ char *save_ptr = NULL; ++ ++ tmp_key = gf_strdup(key); ++ strtok_r(tmp_key, ":", &save_ptr); ++ if (!*save_ptr) { ++ gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL, ++ "Could not tokenize domain string from key %s", key); ++ return -1; ++ } ++ ++ pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr, ++ d->keep_max, key); ++ if (tmp_key) ++ GF_FREE(tmp_key); ++ ++ return 0; ++} ++ ++void ++pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local, ++ inode_t *inode, dict_t *dict, ++ gf_boolean_t keep_max) ++{ ++ multi_dom_lk_data data; ++ ++ data.this = this; ++ data.inode = inode; ++ data.xdata_rsp = dict; ++ data.keep_max = keep_max; ++ ++ dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*", ++ pl_inodelk_xattr_fill_multiple, &data); ++} ++ ++void + pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, + inode_t *inode, char *name, dict_t *xdata, + gf_boolean_t max_lock) +@@ -437,6 +529,9 @@ pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent, + + if (local->posixlk_count_req) + pl_posixlk_xattr_fill(this, inode, xdata, max_lock); ++ ++ if (local->multiple_dom_lk_requests) ++ pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock); + } + + /* Checks whether the region where fop is acting upon conflicts +@@ -773,9 +868,6 @@ pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + { + pl_local_t *local = frame->local; + +- if (local->xdata) +- dict_unref(local->xdata); +- + pl_track_io_fop_count(local, this, DECREMENT); + + if (local->op == GF_FOP_TRUNCATE) +@@ -932,9 +1024,6 @@ unwind: + "ret: %d, error: %s", + op_ret, strerror(op_errno)); + +- if (local->xdata) +- dict_unref(local->xdata); +- + switch (local->op) { + case GF_FOP_TRUNCATE: + PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf, +-- +1.8.3.1 + diff --git a/SOURCES/0425-tests-Fix-spurious-self-heald.t-failure.patch b/SOURCES/0425-tests-Fix-spurious-self-heald.t-failure.patch new file mode 100644 index 0000000..7bfc04a --- /dev/null +++ b/SOURCES/0425-tests-Fix-spurious-self-heald.t-failure.patch @@ -0,0 +1,187 @@ +From 2c582ea6c76031463501b31d9250e739d5aeda79 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Fri, 5 Jun 2020 14:28:11 +0530 +Subject: [PATCH 425/449] tests: Fix spurious self-heald.t failure + +Problem: +heal-info code assumes that all indices in xattrop directory +definitely need heal. There is one corner case. +The very first xattrop on the file will lead to adding the +gfid to 'xattrop' index in fop path and in _cbk path it is +removed because the fop is zero-xattr xattrop in success case. +These gfids could be read by heal-info and shown as needing heal. + +Fix: +Check the pending flag to see if the file definitely needs or +not instead of which index is being crawled at the moment. + +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24110/ +> fixes: bz#1801623 +> Change-Id: I79f00dc7366fedbbb25ec4bec838dba3b34c7ad5 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1721355 +Change-Id: I7efdf45a5158fadfdbdd21c91837f193d80fa6c7 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202491 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> +--- + heal/src/glfs-heal.c | 17 ++++++---------- + xlators/cluster/afr/src/afr-common.c | 38 ++++++++++++++---------------------- + 2 files changed, 21 insertions(+), 34 deletions(-) + +diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c +index 5af9e31..125b12c 100644 +--- a/heal/src/glfs-heal.c ++++ b/heal/src/glfs-heal.c +@@ -775,8 +775,7 @@ static int + glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + uint64_t *offset, num_entries_t *num_entries, + print_status glfsh_print_status, +- gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode, +- dict_t *xattr_req) ++ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode) + { + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; +@@ -808,7 +807,7 @@ glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries, + + gf_uuid_parse(entry->d_name, gfid); + gf_uuid_copy(loc.gfid, gfid); +- ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, xattr_req, NULL); ++ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL); + if (ret) { + if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN)) + goto out; +@@ -877,19 +876,19 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + if (heal_op == GF_SHD_OP_INDEX_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_heal_status, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_spb_status, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_print_summary_status, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) { +@@ -898,7 +897,7 @@ glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) { + ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset, + num_entries, glfsh_heal_status_boolean, +- ignore, mode, xattr_req); ++ ignore, mode); + if (ret < 0) + goto out; + } +@@ -952,10 +951,6 @@ glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + int32_t op_errno = 0; + gf_boolean_t ignore = _gf_false; + +- ret = dict_set_str(xattr_req, "index-vgfid", vgfid); +- if (ret) +- return ret; +- + if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID)) + ignore = _gf_true; + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index c355ec5..89e2483 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -5995,8 +5995,8 @@ afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies, + + static int + afr_update_heal_status(xlator_t *this, struct afr_reply *replies, +- char *index_vgfid, ia_type_t ia_type, gf_boolean_t *esh, +- gf_boolean_t *dsh, gf_boolean_t *msh) ++ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh, ++ gf_boolean_t *msh, unsigned char pending) + { + int ret = -1; + GF_UNUSED int ret1 = 0; +@@ -6026,14 +6026,7 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, + } + } + +- if (!strcmp(index_vgfid, GF_XATTROP_INDEX_GFID)) { +- if (shd_domain_lk_count) { +- ret = -EAGAIN; /*For 'possibly-healing'. */ +- } else { +- ret = 0; /*needs heal. Just set a non -ve value so that it is +- assumed as the source index.*/ +- } +- } else if (!strcmp(index_vgfid, GF_XATTROP_DIRTY_GFID)) { ++ if (!pending) { + if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) || + (!io_domain_lk_count)) { + /* Needs heal. */ +@@ -6042,6 +6035,13 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, + /* No heal needed. */ + *dsh = *esh = *msh = 0; + } ++ } else { ++ if (shd_domain_lk_count) { ++ ret = -EAGAIN; /*For 'possibly-healing'. */ ++ } else { ++ ret = 0; /*needs heal. Just set a non -ve value so that it is ++ assumed as the source index.*/ ++ } + } + return ret; + } +@@ -6049,8 +6049,8 @@ afr_update_heal_status(xlator_t *this, struct afr_reply *replies, + /*return EIO, EAGAIN or pending*/ + int + afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, +- inode_t **inode, char *index_vgfid, +- gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, ++ inode_t **inode, gf_boolean_t *entry_selfheal, ++ gf_boolean_t *data_selfheal, + gf_boolean_t *metadata_selfheal, unsigned char *pending) + { + int ret = -1; +@@ -6109,8 +6109,8 @@ afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, + goto out; + } + +- ret = afr_update_heal_status(this, replies, index_vgfid, (*inode)->ia_type, +- &esh, &dsh, &msh); ++ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh, ++ &msh, *pending); + out: + *data_selfheal = dsh; + *entry_selfheal = esh; +@@ -6133,16 +6133,8 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) + inode_t *inode = NULL; + char *substr = NULL; + char *status = NULL; +- afr_local_t *local = NULL; +- char *index_vgfid = NULL; +- +- local = frame->local; +- if (dict_get_str(local->xdata_req, "index-vgfid", &index_vgfid)) { +- ret = -1; +- goto out; +- } + +- ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, index_vgfid, ++ ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, + &entry_selfheal, &data_selfheal, + &metadata_selfheal, &pending); + +-- +1.8.3.1 + diff --git a/SOURCES/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch b/SOURCES/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch new file mode 100644 index 0000000..a96b66e --- /dev/null +++ b/SOURCES/0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch @@ -0,0 +1,216 @@ +From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001 +From: Harpreet Kaur <hlalwani@redhat.com> +Date: Mon, 7 Jan 2019 16:38:25 +0530 +Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected" + issue + +problem: Geo-rep gsyncd process mounts the master and slave volume + on master nodes and slave nodes respectively and starts + the sync. But it doesn't wait for the mount to be in ready + state to accept I/O. The gluster mount is considered to be + ready when all the distribute sub-volumes is up. If the all + the distribute subvolumes are not up, it can cause ENOTCONN + error, when lookup on file comes and file is on the subvol + that is down. + +solution: Added a Virtual Xattr "dht.subvol.status" which returns "1" + if all subvols are up and "0" if all subvols are not up. + Geo-rep then uses this virtual xattr after a fresh mount, to + check whether all subvols are up or not and then starts the + I/O. + +>fixes: bz#1664335 +>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91 +>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com> +>Signed-off-by: Kotresh HR <khiremat@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/22001/ +BUG: 1640573 +Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202554 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/resource.py | 11 ++++++ + geo-replication/syncdaemon/syncdutils.py | 20 +++++++++-- + xlators/cluster/dht/src/dht-common.c | 59 ++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.h | 4 +++ + 4 files changed, 91 insertions(+), 3 deletions(-) + +diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py +index 189d8a1..0c61de9 100644 +--- a/geo-replication/syncdaemon/resource.py ++++ b/geo-replication/syncdaemon/resource.py +@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable + from syncdutils import get_changelog_log_level, get_rsync_version + from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION + from syncdutils import GX_GFID_CANONICAL_LEN ++from syncdutils import gf_mount_ready + from gsyncdstatus import GeorepStatus + from syncdutils import lf, Popen, sup + from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt +@@ -950,6 +951,16 @@ class Mounter(object): + logging.exception('mount cleanup failure:') + rv = 200 + os._exit(rv) ++ ++ #Polling the dht.subvol.status value. ++ RETRIES = 10 ++ while not gf_mount_ready(): ++ if RETRIES < 0: ++ logging.error('Subvols are not up') ++ break ++ RETRIES -= 1 ++ time.sleep(0.2) ++ + logging.debug('auxiliary glusterfs mount prepared') + + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index b08098e..7560fa1 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -21,8 +21,8 @@ import subprocess + import socket + from subprocess import PIPE + from threading import Lock, Thread as baseThread +-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED +-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode ++from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED ++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode + from signal import signal, SIGTERM + import select as oselect + from os import waitpid as owaitpid +@@ -55,6 +55,8 @@ from rconf import rconf + + from hashlib import sha256 as sha256 + ++ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') ++ + # auxiliary gfid based access prefix + _CL_AUX_GFID_PFX = ".gfid/" + ROOT_GFID = "00000000-0000-0000-0000-000000000001" +@@ -100,6 +102,19 @@ def unescape_space_newline(s): + .replace(NEWLINE_ESCAPE_CHAR, "\n")\ + .replace(PERCENTAGE_ESCAPE_CHAR, "%") + ++# gf_mount_ready() returns 1 if all subvols are up, else 0 ++def gf_mount_ready(): ++ ret = errno_wrap(Xattr.lgetxattr, ++ ['.', 'dht.subvol.status', 16], ++ [ENOENT, ENOTSUP, ENODATA], [ENOMEM]) ++ ++ if isinstance(ret, int): ++ logging.error("failed to get the xattr value") ++ return 1 ++ ret = ret.rstrip('\x00') ++ if ret == "1": ++ return 1 ++ return 0 + + def norm(s): + if s: +@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]): + def lstat(e): + return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY]) + +- + def get_gfid_from_mnt(gfidpath): + return errno_wrap(Xattr.lgetxattr, + [gfidpath, 'glusterfs.gfid.string', +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 6aa18f3..23cc80c 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -4858,6 +4858,60 @@ out: + return 0; + } + ++/* Virtual Xattr which returns 1 if all subvols are up, ++ else returns 0. Geo-rep then uses this virtual xattr ++ after a fresh mount and starts the I/O. ++*/ ++ ++enum dht_vxattr_subvol { ++ DHT_VXATTR_SUBVOLS_UP = 1, ++ DHT_VXATTR_SUBVOLS_DOWN = 0, ++}; ++ ++int ++dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this, ++ const char *key) ++{ ++ dht_local_t *local = NULL; ++ int ret = -1; ++ int op_errno = ENODATA; ++ int value = DHT_VXATTR_SUBVOLS_UP; ++ int i = 0; ++ dht_conf_t *conf = NULL; ++ ++ conf = this->private; ++ local = frame->local; ++ ++ if (!key) { ++ op_errno = EINVAL; ++ goto out; ++ } ++ local->xattr = dict_new(); ++ if (!local->xattr) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ for (i = 0; i < conf->subvolume_cnt; i++) { ++ if (!conf->subvolume_status[i]) { ++ value = DHT_VXATTR_SUBVOLS_DOWN; ++ gf_msg_debug(this->name, 0, "subvol %s is down ", ++ conf->subvolumes[i]->name); ++ break; ++ } ++ } ++ ret = dict_set_int8(local->xattr, (char *)key, value); ++ if (ret < 0) { ++ op_errno = -ret; ++ ret = -1; ++ goto out; ++ } ++ ret = 0; ++ ++out: ++ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL); ++ return 0; ++} ++ + int + dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + dict_t *xdata) +@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + goto err; + } + ++ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) { ++ dht_vgetxattr_subvol_status(frame, this, key); ++ return 0; ++ } ++ + /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */ + if (!DHT_IS_DIR(layout)) + goto no_dht_is_dir; +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 1b3e826..9ec5b51 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -45,6 +45,10 @@ + #define DHT_DIR_STAT_BLOCKS 8 + #define DHT_DIR_STAT_SIZE 4096 + ++/* Virtual xattr for subvols status */ ++ ++#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status" ++ + /* Virtual xattrs for debugging */ + + #define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*" +-- +1.8.3.1 + diff --git a/SOURCES/0427-storage-posix-Fixing-a-coverity-issue.patch b/SOURCES/0427-storage-posix-Fixing-a-coverity-issue.patch new file mode 100644 index 0000000..ebeb556 --- /dev/null +++ b/SOURCES/0427-storage-posix-Fixing-a-coverity-issue.patch @@ -0,0 +1,38 @@ +From 3943fce5818a353117fc1c492e6383434d742979 Mon Sep 17 00:00:00 2001 +From: Barak Sason <bsasonro@redhat.com> +Date: Sun, 18 Aug 2019 17:52:04 +0300 +Subject: [PATCH 427/449] storage/posix - Fixing a coverity issue + +Fixed a resource leak of variable 'pfd' + +backport of https://review.gluster.org/#/c/glusterfs/+/23261/ +>CID: 1400673 +>Updates: bz#789278 +>Change-Id: I78e1e8a89e0604b56e35a75c25d436b35db096c3 +>Signed-off-by: Barak Sason <bsasonro@redhat.com> + +BUG: 1787310 +Change-Id: I78e1e8a89e0604b56e35a75c25d436b35db096c3 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202563 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-inode-fd-ops.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index bcce06e..5748b9f 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1603,6 +1603,7 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); ++ GF_FREE(pfd); + goto out; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch b/SOURCES/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch new file mode 100644 index 0000000..dba8f3c --- /dev/null +++ b/SOURCES/0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch @@ -0,0 +1,48 @@ +From 7e1bf1e338a6effe209f57b1b92a70d5d25a73bf Mon Sep 17 00:00:00 2001 +From: Jiffin Tony Thottan <jthottan@redhat.com> +Date: Mon, 26 Aug 2019 11:32:18 +0530 +Subject: [PATCH 428/449] glusterd/ganesha: fixing resource leak in + tear_down_cluster() + +backport of https://review.gluster.org/#/c/glusterfs/+/23295/ +>CID: 1370947 +>Updates: bz#789278 +>Change-Id: Ib694056430ff0536ed705a0e77e5ace22486891e +>Signed-off-by: Jiffin Tony Thottan <jthottan@redhat.com> + +BUG: 1787310 +Change-Id: Ib694056430ff0536ed705a0e77e5ace22486891e +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202561 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 0a16925..06f028f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -659,10 +659,18 @@ tear_down_cluster(gf_boolean_t run_teardown) + "Failed to close dir %s. Reason :" + " %s", + CONFDIR, strerror(errno)); ++ goto exit; + } + } + + out: ++ if (dir && sys_closedir(dir)) { ++ gf_msg_debug(THIS->name, 0, ++ "Failed to close dir %s. Reason :" ++ " %s", ++ CONFDIR, strerror(errno)); ++ } ++exit: + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch b/SOURCES/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch new file mode 100644 index 0000000..8ac6529 --- /dev/null +++ b/SOURCES/0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch @@ -0,0 +1,61 @@ +From 1370db202a2a60810409f74c390448bf8fbd6998 Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Sun, 9 Feb 2020 15:09:30 +0200 +Subject: [PATCH 429/449] dht/rebalance - fixing failure occurace due to + rebalance stop + +Probelm description: +When topping rebalance, the following error messages appear in the +rebalance log file: +[2020-01-28 14:31:42.452070] W [dht-rebalance.c:3447:gf_defrag_process_dir] 0-distrep-dht: Found error from gf_defrag_get_entry +[2020-01-28 14:31:42.452764] E [MSGID: 109111] [dht-rebalance.c:3971:gf_defrag_fix_layout] 0-distrep-dht: gf_defrag_process_dir failed for directory: /0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30/31 +[2020-01-28 14:31:42.453498] E [MSGID: 109016] [dht-rebalance.c:3906:gf_defrag_fix_layout] 0-distrep-dht: Fix layout failed for /0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23/24/25/26/27/28/29/30 + +In order to avoid seing these error messages, a modification to the +error handling mechanism has been made. +In addition, several log messages had been added in order to improve debugging efficiency + +backport of https://review.gluster.org/#/c/glusterfs/+/24103/ +>fixes: bz#1800956 +>Change-Id: Ifc82dae79ab3da9fe22ee25088a2a6b855afcfcf +>Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> + +BUG: 1286171 +Change-Id: Ifc82dae79ab3da9fe22ee25088a2a6b855afcfcf +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202562 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 8f31dca..88b6b54 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3479,6 +3479,10 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + migrate_data, dir_dfmeta, xattr_req, + &should_commit_hash, perrno); + ++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ goto out; ++ } ++ + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Found " +@@ -3935,6 +3939,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, + migrate_data); + ++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ goto out; ++ } ++ + if (ret && ret != 2) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED, + "Fix layout failed for %s", entry_loc.path); +-- +1.8.3.1 + diff --git a/SOURCES/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch b/SOURCES/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch new file mode 100644 index 0000000..6ff69e8 --- /dev/null +++ b/SOURCES/0430-Fix-some-Null-pointer-dereference-coverity-issues.patch @@ -0,0 +1,291 @@ +From 7fe500a03d42dba6082c28ef7284c950c44fbfa3 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Wed, 22 May 2019 17:46:19 +0200 +Subject: [PATCH 430/449] Fix some "Null pointer dereference" coverity issues + +This patch fixes the following CID's: + + * 1124829 + * 1274075 + * 1274083 + * 1274128 + * 1274135 + * 1274141 + * 1274143 + * 1274197 + * 1274205 + * 1274210 + * 1274211 + * 1288801 + * 1398629 + +Backport of: +> Upstream-patch-link: https://review.gluster.org/22767 +> Change-Id: Ia7c86cfab3245b20777ffa296e1a59748040f558 +> Updates: bz#789278 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1787310 +Change-Id: Ia7c86cfab3245b20777ffa296e1a59748040f558 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202616 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-system.c | 2 +- + cli/src/cli-xml-output.c | 2 +- + glusterfsd/src/glusterfsd.c | 24 +++++++++++++----------- + libglusterfs/src/inode.c | 3 +++ + rpc/rpc-lib/src/rpcsvc.c | 4 ++++ + xlators/cluster/dht/src/dht-shared.c | 4 ++++ + xlators/cluster/dht/src/switch.c | 9 +++++++++ + xlators/features/trash/src/trash.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 7 +++++-- + xlators/nfs/server/src/mount3.c | 6 ++++++ + xlators/protocol/client/src/client.c | 7 ++++++- + xlators/storage/posix/src/posix-helpers.c | 3 +++ + 12 files changed, 56 insertions(+), 17 deletions(-) + +diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c +index 8cd1542..cb3a9ea 100644 +--- a/cli/src/cli-cmd-system.c ++++ b/cli/src/cli-cmd-system.c +@@ -446,7 +446,7 @@ cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word, + dict_t *dict = NULL; + cli_local_t *local = NULL; + +- if (wordcount < 3) { ++ if ((wordcount < 3) || (words[2] == NULL)) { + cli_usage_out(word->pattern); + goto out; + } +diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c +index 006e2fb..903997c 100644 +--- a/cli/src/cli-xml-output.c ++++ b/cli/src/cli-xml-output.c +@@ -64,7 +64,7 @@ cli_begin_xml_output(xmlTextWriterPtr *writer, xmlDocPtr *doc) + int ret = -1; + + *writer = xmlNewTextWriterDoc(doc, 0); +- if (writer == NULL) { ++ if (*writer == NULL) { + ret = -1; + goto out; + } +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 974fb88..9821180 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1235,19 +1235,21 @@ parse_opts(int key, char *arg, struct argp_state *state) + case ARGP_BRICK_PORT_KEY: + n = 0; + +- port_str = strtok_r(arg, ",", &tmp_str); +- if (gf_string2uint_base10(port_str, &n) == 0) { +- cmd_args->brick_port = n; +- port_str = strtok_r(NULL, ",", &tmp_str); +- if (port_str) { +- if (gf_string2uint_base10(port_str, &n) == 0) { +- cmd_args->brick_port2 = n; +- break; ++ if (arg != NULL) { ++ port_str = strtok_r(arg, ",", &tmp_str); ++ if (gf_string2uint_base10(port_str, &n) == 0) { ++ cmd_args->brick_port = n; ++ port_str = strtok_r(NULL, ",", &tmp_str); ++ if (port_str) { ++ if (gf_string2uint_base10(port_str, &n) == 0) { ++ cmd_args->brick_port2 = n; ++ break; ++ } ++ argp_failure(state, -1, 0, ++ "wrong brick (listen) port %s", arg); + } +- argp_failure(state, -1, 0, "wrong brick (listen) port %s", +- arg); ++ break; + } +- break; + } + + argp_failure(state, -1, 0, "unknown brick (listen) port %s", arg); +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 9dbb25b..4c3c546 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -899,6 +899,9 @@ inode_resolve(inode_table_t *table, char *path) + + parent = inode_ref(table->root); + str = tmp = gf_strdup(path); ++ if (str == NULL) { ++ goto out; ++ } + + while (1) { + bname = strtok_r(str, "/", &saveptr); +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 5a35139..b058932 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -2874,6 +2874,10 @@ rpcsvc_transport_peer_check_search(dict_t *options, char *pattern, char *ip, + } + + dup_addrstr = gf_strdup(addrstr); ++ if (dup_addrstr == NULL) { ++ ret = -1; ++ goto err; ++ } + addrtok = strtok_r(dup_addrstr, ",", &svptr); + while (addrtok) { + /* CASEFOLD not present on Solaris */ +diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c +index ea4b7c6..58e3339 100644 +--- a/xlators/cluster/dht/src/dht-shared.c ++++ b/xlators/cluster/dht/src/dht-shared.c +@@ -278,6 +278,10 @@ dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf, + goto out; + + dup_brick = gf_strdup(bricks); ++ if (dup_brick == NULL) { ++ goto out; ++ } ++ + node = strtok_r(dup_brick, ",", &tmpstr); + while (node) { + for (i = 0; i < conf->subvolume_cnt; i++) { +diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c +index a782fcd..207d109 100644 +--- a/xlators/cluster/dht/src/switch.c ++++ b/xlators/cluster/dht/src/switch.c +@@ -610,9 +610,15 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str) + /* Get the pattern for considering switch case. + "option block-size *avi:10MB" etc */ + option_string = gf_strdup(pattern_str); ++ if (option_string == NULL) { ++ goto err; ++ } + switch_str = strtok_r(option_string, ";", &tmp_str); + while (switch_str) { + dup_str = gf_strdup(switch_str); ++ if (dup_str == NULL) { ++ goto err; ++ } + switch_opt = GF_CALLOC(1, sizeof(struct switch_struct), + gf_switch_mt_switch_struct); + if (!switch_opt) { +@@ -647,6 +653,9 @@ set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str) + + if (childs) { + dup_childs = gf_strdup(childs); ++ if (dup_childs == NULL) { ++ goto err; ++ } + child = strtok_r(dup_childs, ",", &tmp); + while (child) { + if (gf_switch_valid_child(this, child)) { +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index d668436..f96ed73 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -170,7 +170,7 @@ store_eliminate_path(char *str, trash_elim_path **eliminate) + int ret = 0; + char *strtokptr = NULL; + +- if (eliminate == NULL) { ++ if ((str == NULL) || (eliminate == NULL)) { + ret = EINVAL; + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +index 0f40bea..85c06c1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c ++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +@@ -5981,7 +5981,7 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname, + GF_ASSERT(this); + + ret = glusterd_urltransform_single(slave, "normalize", &linearr); +- if (ret == -1) { ++ if ((ret == -1) || (linearr[0] == NULL)) { + ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Url: %s", slave); + errmsg[ret] = '\0'; + *op_errstr = gf_strdup(errmsg); +@@ -5992,7 +5992,10 @@ glusterd_get_slave_info(char *slave, char **slave_url, char **hostname, + + tmp = strtok_r(linearr[0], "/", &save_ptr); + tmp = strtok_r(NULL, "/", &save_ptr); +- slave = strtok_r(tmp, ":", &save_ptr); ++ slave = NULL; ++ if (tmp != NULL) { ++ slave = strtok_r(tmp, ":", &save_ptr); ++ } + if (slave) { + ret = glusterd_geo_rep_parse_slave(slave, hostname, op_errstr); + if (ret) { +diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c +index 396809c..734453c 100644 +--- a/xlators/nfs/server/src/mount3.c ++++ b/xlators/nfs/server/src/mount3.c +@@ -3205,6 +3205,12 @@ mnt3_export_parse_auth_param(struct mnt3_export *exp, char *exportpath) + struct host_auth_spec *host = NULL; + int ret = 0; + ++ if (exportpath == NULL) { ++ gf_msg(GF_MNT, GF_LOG_ERROR, EINVAL, NFS_MSG_PARSE_HOSTSPEC_FAIL, ++ "Export path is NULL"); ++ return -1; ++ } ++ + /* Using exportpath directly in strtok_r because we want + * to strip off AUTH parameter from exportpath. */ + token = strtok_r(exportpath, "(", &savPtr); +diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c +index e156d4d..ed855ca 100644 +--- a/xlators/protocol/client/src/client.c ++++ b/xlators/protocol/client/src/client.c +@@ -1222,9 +1222,12 @@ client_set_remote_options(char *value, xlator_t *this) + char *remote_port_str = NULL; + char *tmp = NULL; + int remote_port = 0; +- int ret = 0; ++ int ret = -1; + + dup_value = gf_strdup(value); ++ if (dup_value == NULL) { ++ goto out; ++ } + host = strtok_r(dup_value, ":", &tmp); + subvol = strtok_r(NULL, ":", &tmp); + remote_port_str = strtok_r(NULL, ":", &tmp); +@@ -1238,6 +1241,7 @@ client_set_remote_options(char *value, xlator_t *this) + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED, + "failed to set remote-host with %s", host); ++ GF_FREE(host_dup); + goto out; + } + } +@@ -1252,6 +1256,7 @@ client_set_remote_options(char *value, xlator_t *this) + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_DICT_SET_FAILED, + "failed to set remote-host with %s", host); ++ GF_FREE(subvol_dup); + goto out; + } + } +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 949c799..2336add 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -390,6 +390,9 @@ _posix_get_marker_quota_contributions(posix_xattr_filler_t *filler, char *key) + int i = 0, ret = 0; + + tmp_key = ptr = gf_strdup(key); ++ if (tmp_key == NULL) { ++ return -1; ++ } + for (i = 0; i < 4; i++) { + token = strtok_r(tmp_key, ".", &saveptr); + tmp_key = NULL; +-- +1.8.3.1 + diff --git a/SOURCES/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch b/SOURCES/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch new file mode 100644 index 0000000..341cfc1 --- /dev/null +++ b/SOURCES/0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch @@ -0,0 +1,638 @@ +From d7c52ddd2cbadb1d9a55767c2f7fe6ba38d9a2ed Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Wed, 20 Nov 2019 12:42:12 +0530 +Subject: [PATCH 431/449] glusterd: check for same node while adding bricks in + disperse volume + +The optimal way for configuring disperse and replicate volumes +is to have all bricks in different nodes. + +During create operation it fails saying it is not optimal, user +must use force to over-ride this behavior. Implementing same +during add-brick operation to avoid situation where all the added +bricks end up from same host. Operation will error out accordingly. +and this can be over-ridden by using force same as create. + +> Upstream Patch Link: https://review.gluster.org/#/c/glusterfs/+/23729 +> fixes: #1047 +> Change-Id: I3ee9c97c1a14b73f4532893bc00187ef9355238b +> Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> + +BUG: 1524457 +Change-Id: I3ee9c97c1a14b73f4532893bc00187ef9355238b +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202621 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 20 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 224 ++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-utils.h | 4 + + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 293 +++--------------------- + 4 files changed, 276 insertions(+), 265 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index c5141de..d424f31 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -21,7 +21,6 @@ + #include "glusterd-messages.h" + #include "glusterd-server-quorum.h" + #include <glusterfs/run.h> +-#include "glusterd-volgen.h" + #include <glusterfs/syscall.h> + #include <sys/signal.h> + +@@ -1575,6 +1574,25 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + + is_force = dict_get_str_boolean(dict, "force", _gf_false); + ++ /* Check brick order if the volume type is replicate or disperse. If ++ * force at the end of command not given then check brick order. ++ */ ++ ++ if (!is_force) { ++ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || ++ (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not adding brick because of " ++ "bad brick order. %s", ++ msg); ++ *op_errstr = gf_strdup(msg); ++ goto out; ++ } ++ } ++ } ++ + if (volinfo->replica_count < replica_count && !is_force) { + cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) + { +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index a1299bc..14e23d1 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -14759,3 +14759,227 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo) + return _gf_true; + return _gf_false; + } ++ ++static gf_ai_compare_t ++glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) ++{ ++ int ret = -1; ++ struct addrinfo *tmp1 = NULL; ++ struct addrinfo *tmp2 = NULL; ++ char firstip[NI_MAXHOST] = {0.}; ++ char nextip[NI_MAXHOST] = { ++ 0, ++ }; ++ ++ for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { ++ ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, ++ NULL, 0, NI_NUMERICHOST); ++ if (ret) ++ return GF_AI_COMPARE_ERROR; ++ for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { ++ ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, ++ NI_MAXHOST, NULL, 0, NI_NUMERICHOST); ++ if (ret) ++ return GF_AI_COMPARE_ERROR; ++ if (!strcmp(firstip, nextip)) { ++ return GF_AI_COMPARE_MATCH; ++ } ++ } ++ } ++ return GF_AI_COMPARE_NO_MATCH; ++} ++ ++/* Check for non optimal brick order for Replicate/Disperse : ++ * Checks if bricks belonging to a replicate or disperse ++ * volume are present on the same server ++ */ ++int32_t ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) ++{ ++ int ret = -1; ++ int i = 0; ++ int j = 0; ++ int k = 0; ++ xlator_t *this = NULL; ++ addrinfo_list_t *ai_list = NULL; ++ addrinfo_list_t *ai_list_tmp1 = NULL; ++ addrinfo_list_t *ai_list_tmp2 = NULL; ++ char *brick = NULL; ++ char *brick_list = NULL; ++ char *brick_list_dup = NULL; ++ char *brick_list_ptr = NULL; ++ char *tmpptr = NULL; ++ char *volname = NULL; ++ int32_t brick_count = 0; ++ int32_t sub_count = 0; ++ struct addrinfo *ai_info = NULL; ++ char brick_addr[128] = { ++ 0, ++ }; ++ int addrlen = 0; ++ ++ const char failed_string[2048] = ++ "Failed to perform brick order " ++ "check. Use 'force' at the end of the command" ++ " if you want to override this behavior. "; ++ const char found_string[2048] = ++ "Multiple bricks of a %s " ++ "volume are present on the same server. This " ++ "setup is not optimal. Bricks should be on " ++ "different nodes to have best fault tolerant " ++ "configuration. Use 'force' at the end of the " ++ "command if you want to override this " ++ "behavior. "; ++ ++ this = THIS; ++ ++ GF_ASSERT(this); ++ ++ ai_list = MALLOC(sizeof(addrinfo_list_t)); ++ ai_list->info = NULL; ++ CDS_INIT_LIST_HEAD(&ai_list->list); ++ ++ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Unable to get volume name"); ++ goto out; ++ } ++ ++ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could not " ++ "retrieve bricks list"); ++ goto out; ++ } ++ ++ ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could not " ++ "retrieve brick count"); ++ goto out; ++ } ++ ++ if (type != GF_CLUSTER_TYPE_DISPERSE) { ++ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), ++ &sub_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve replica count"); ++ goto out; ++ } ++ gf_msg_debug(this->name, 0, ++ "Replicate cluster type " ++ "found. Checking brick order."); ++ } else { ++ ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), ++ &sub_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve disperse count"); ++ goto out; ++ } ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, ++ "Disperse cluster type" ++ " found. Checking brick order."); ++ } ++ brick_list_dup = brick_list_ptr = gf_strdup(brick_list); ++ /* Resolve hostnames and get addrinfo */ ++ while (i < brick_count) { ++ ++i; ++ brick = strtok_r(brick_list_dup, " \n", &tmpptr); ++ brick_list_dup = tmpptr; ++ if (brick == NULL) ++ goto check_failed; ++ tmpptr = strrchr(brick, ':'); ++ if (tmpptr == NULL) ++ goto check_failed; ++ addrlen = strlen(brick) - strlen(tmpptr); ++ strncpy(brick_addr, brick, addrlen); ++ brick_addr[addrlen] = '\0'; ++ ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); ++ if (ret != 0) { ++ ret = 0; ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, ++ "unable to resolve host name for addr %s", brick_addr); ++ goto out; ++ } ++ ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); ++ if (ai_list_tmp1 == NULL) { ++ ret = 0; ++ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "failed to allocate " ++ "memory"); ++ freeaddrinfo(ai_info); ++ goto out; ++ } ++ ai_list_tmp1->info = ai_info; ++ cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); ++ ai_list_tmp1 = NULL; ++ } ++ ++ i = 0; ++ ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); ++ ++ /* Check for bad brick order */ ++ while (i < brick_count) { ++ ++i; ++ ai_info = ai_list_tmp1->info; ++ ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, ++ list); ++ if (0 == i % sub_count) { ++ j = 0; ++ continue; ++ } ++ ai_list_tmp2 = ai_list_tmp1; ++ k = j; ++ while (k < sub_count - 1) { ++ ++k; ++ ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); ++ if (GF_AI_COMPARE_ERROR == ret) ++ goto check_failed; ++ if (GF_AI_COMPARE_MATCH == ret) ++ goto found_bad_brick_order; ++ ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, ++ addrinfo_list_t, list); ++ } ++ ++j; ++ } ++ gf_msg_debug(this->name, 0, "Brick order okay"); ++ ret = 0; ++ goto out; ++ ++check_failed: ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, ++ "Failed bad brick order check"); ++ snprintf(err_str, sizeof(failed_string), failed_string); ++ ret = -1; ++ goto out; ++ ++found_bad_brick_order: ++ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, ++ "Bad brick order found"); ++ if (type == GF_CLUSTER_TYPE_DISPERSE) { ++ snprintf(err_str, sizeof(found_string), found_string, "disperse"); ++ } else { ++ snprintf(err_str, sizeof(found_string), found_string, "replicate"); ++ } ++ ++ ret = -1; ++out: ++ ai_list_tmp2 = NULL; ++ GF_FREE(brick_list_ptr); ++ cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) ++ { ++ if (ai_list_tmp1->info) ++ freeaddrinfo(ai_list_tmp1->info); ++ free(ai_list_tmp2); ++ ai_list_tmp2 = ai_list_tmp1; ++ } ++ free(ai_list_tmp2); ++ return ret; ++} +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index ead16b2..e2e2454 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -881,4 +881,8 @@ glusterd_is_profile_on(glusterd_volinfo_t *volinfo); + + char * + search_brick_path_from_proc(pid_t brick_pid, char *brickpath); ++ ++int32_t ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type); ++ + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 93042ab..8da2ff3 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -41,240 +41,6 @@ + #define glusterd_op_start_volume_args_get(dict, volname, flags) \ + glusterd_op_stop_volume_args_get(dict, volname, flags) + +-gf_ai_compare_t +-glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) +-{ +- int ret = -1; +- struct addrinfo *tmp1 = NULL; +- struct addrinfo *tmp2 = NULL; +- char firstip[NI_MAXHOST] = {0.}; +- char nextip[NI_MAXHOST] = { +- 0, +- }; +- +- for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { +- ret = getnameinfo(tmp1->ai_addr, tmp1->ai_addrlen, firstip, NI_MAXHOST, +- NULL, 0, NI_NUMERICHOST); +- if (ret) +- return GF_AI_COMPARE_ERROR; +- for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { +- ret = getnameinfo(tmp2->ai_addr, tmp2->ai_addrlen, nextip, +- NI_MAXHOST, NULL, 0, NI_NUMERICHOST); +- if (ret) +- return GF_AI_COMPARE_ERROR; +- if (!strcmp(firstip, nextip)) { +- return GF_AI_COMPARE_MATCH; +- } +- } +- } +- return GF_AI_COMPARE_NO_MATCH; +-} +- +-/* Check for non optimal brick order for replicate : +- * Checks if bricks belonging to a replicate volume +- * are present on the same server +- */ +-int32_t +-glusterd_check_brick_order(dict_t *dict, char *err_str) +-{ +- int ret = -1; +- int i = 0; +- int j = 0; +- int k = 0; +- xlator_t *this = NULL; +- addrinfo_list_t *ai_list = NULL; +- addrinfo_list_t *ai_list_tmp1 = NULL; +- addrinfo_list_t *ai_list_tmp2 = NULL; +- char *brick = NULL; +- char *brick_list = NULL; +- char *brick_list_dup = NULL; +- char *brick_list_ptr = NULL; +- char *tmpptr = NULL; +- char *volname = NULL; +- int32_t brick_count = 0; +- int32_t type = GF_CLUSTER_TYPE_NONE; +- int32_t sub_count = 0; +- struct addrinfo *ai_info = NULL; +- char brick_addr[128] = { +- 0, +- }; +- int addrlen = 0; +- +- const char failed_string[2048] = +- "Failed to perform brick order " +- "check. Use 'force' at the end of the command" +- " if you want to override this behavior. "; +- const char found_string[2048] = +- "Multiple bricks of a %s " +- "volume are present on the same server. This " +- "setup is not optimal. Bricks should be on " +- "different nodes to have best fault tolerant " +- "configuration. Use 'force' at the end of the " +- "command if you want to override this " +- "behavior. "; +- +- this = THIS; +- +- GF_ASSERT(this); +- +- ai_list = MALLOC(sizeof(addrinfo_list_t)); +- ai_list->info = NULL; +- CDS_INIT_LIST_HEAD(&ai_list->list); +- +- ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Unable to get volume name"); +- goto out; +- } +- +- ret = dict_get_int32n(dict, "type", SLEN("type"), &type); +- if (ret) { +- snprintf(err_str, 512, "Unable to get type of volume %s", volname); +- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", +- err_str); +- goto out; +- } +- +- ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &brick_list); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could not " +- "retrieve bricks list"); +- goto out; +- } +- +- ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could not " +- "retrieve brick count"); +- goto out; +- } +- +- if (type != GF_CLUSTER_TYPE_DISPERSE) { +- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve replica count"); +- goto out; +- } +- gf_msg_debug(this->name, 0, +- "Replicate cluster type " +- "found. Checking brick order."); +- } else { +- ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve disperse count"); +- goto out; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, +- "Disperse cluster type" +- " found. Checking brick order."); +- } +- +- brick_list_dup = brick_list_ptr = gf_strdup(brick_list); +- /* Resolve hostnames and get addrinfo */ +- while (i < brick_count) { +- ++i; +- brick = strtok_r(brick_list_dup, " \n", &tmpptr); +- brick_list_dup = tmpptr; +- if (brick == NULL) +- goto check_failed; +- tmpptr = strrchr(brick, ':'); +- if (tmpptr == NULL) +- goto check_failed; +- addrlen = strlen(brick) - strlen(tmpptr); +- strncpy(brick_addr, brick, addrlen); +- brick_addr[addrlen] = '\0'; +- ret = getaddrinfo(brick_addr, NULL, NULL, &ai_info); +- if (ret != 0) { +- ret = 0; +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL, +- "unable to resolve host name for addr %s", brick_addr); +- goto out; +- } +- ai_list_tmp1 = MALLOC(sizeof(addrinfo_list_t)); +- if (ai_list_tmp1 == NULL) { +- ret = 0; +- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, +- "failed to allocate " +- "memory"); +- freeaddrinfo(ai_info); +- goto out; +- } +- ai_list_tmp1->info = ai_info; +- cds_list_add_tail(&ai_list_tmp1->list, &ai_list->list); +- ai_list_tmp1 = NULL; +- } +- +- i = 0; +- ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); +- +- /* Check for bad brick order */ +- while (i < brick_count) { +- ++i; +- ai_info = ai_list_tmp1->info; +- ai_list_tmp1 = cds_list_entry(ai_list_tmp1->list.next, addrinfo_list_t, +- list); +- if (0 == i % sub_count) { +- j = 0; +- continue; +- } +- ai_list_tmp2 = ai_list_tmp1; +- k = j; +- while (k < sub_count - 1) { +- ++k; +- ret = glusterd_compare_addrinfo(ai_info, ai_list_tmp2->info); +- if (GF_AI_COMPARE_ERROR == ret) +- goto check_failed; +- if (GF_AI_COMPARE_MATCH == ret) +- goto found_bad_brick_order; +- ai_list_tmp2 = cds_list_entry(ai_list_tmp2->list.next, +- addrinfo_list_t, list); +- } +- ++j; +- } +- gf_msg_debug(this->name, 0, "Brick order okay"); +- ret = 0; +- goto out; +- +-check_failed: +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER_CHECK_FAIL, +- "Failed bad brick order check"); +- snprintf(err_str, sizeof(failed_string), failed_string); +- ret = -1; +- goto out; +- +-found_bad_brick_order: +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BAD_BRKORDER, +- "Bad brick order found"); +- if (type == GF_CLUSTER_TYPE_DISPERSE) { +- snprintf(err_str, sizeof(found_string), found_string, "disperse"); +- } else { +- snprintf(err_str, sizeof(found_string), found_string, "replicate"); +- } +- +- ret = -1; +-out: +- ai_list_tmp2 = NULL; +- GF_FREE(brick_list_ptr); +- cds_list_for_each_entry(ai_list_tmp1, &ai_list->list, list) +- { +- if (ai_list_tmp1->info) +- freeaddrinfo(ai_list_tmp1->info); +- free(ai_list_tmp2); +- ai_list_tmp2 = ai_list_tmp1; +- } +- free(ai_list_tmp2); +- return ret; +-} +- + int + __glusterd_handle_create_volume(rpcsvc_request_t *req) + { +@@ -1337,6 +1103,35 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + } + } + ++ /*Check brick order if the volume type is replicate or disperse. If ++ * force at the end of command not given then check brick order. ++ */ ++ if (is_origin_glusterd(dict)) { ++ ret = dict_get_int32n(dict, "type", SLEN("type"), &type); ++ if (ret) { ++ snprintf(msg, sizeof(msg), ++ "Unable to get type of " ++ "volume %s", ++ volname); ++ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", ++ msg); ++ goto out; ++ } ++ ++ if (!is_force) { ++ if ((type == GF_CLUSTER_TYPE_REPLICATE) || ++ (type == GF_CLUSTER_TYPE_DISPERSE)) { ++ ret = glusterd_check_brick_order(dict, msg, type); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not creating volume because of " ++ "bad brick order"); ++ goto out; ++ } ++ } ++ } ++ } ++ + while (i < brick_count) { + i++; + brick = strtok_r(brick_list, " \n", &tmpptr); +@@ -1423,36 +1218,6 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + brick_info = NULL; + } + +- /*Check brick order if the volume type is replicate or disperse. If +- * force at the end of command not given then check brick order. +- */ +- if (is_origin_glusterd(dict)) { +- ret = dict_get_int32n(dict, "type", SLEN("type"), &type); +- if (ret) { +- snprintf(msg, sizeof(msg), +- "Unable to get type of " +- "volume %s", +- volname); +- gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED, "%s", +- msg); +- goto out; +- } +- +- if (!is_force) { +- if ((type == GF_CLUSTER_TYPE_REPLICATE) || +- (type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = glusterd_check_brick_order(dict, msg); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, +- "Not " +- "creating volume because of " +- "bad brick order"); +- goto out; +- } +- } +- } +- } +- + ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), + local_brick_count); + if (ret) { +-- +1.8.3.1 + diff --git a/SOURCES/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch b/SOURCES/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch new file mode 100644 index 0000000..ef589de --- /dev/null +++ b/SOURCES/0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch @@ -0,0 +1,503 @@ +From aa215163cb7d806dc98bef2386a4e282a5e54a31 Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Thu, 25 Apr 2019 12:00:52 +0530 +Subject: [PATCH 432/449] glusterd: Fix coverity defects & put coverity + annotations + +Along with fixing few defect, put the required annotations for the defects which +are marked ignore/false positive/intentional as per the coverity defect sheet. +This should avoid the per component graph showing many defects as open in the +coverity glusterfs web page. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22619/ +> Updates: bz#789278 +> Change-Id: I19461dc3603a3bd8f88866a1ab3db43d783af8e4 +> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1787310 +Change-Id: I19461dc3603a3bd8f88866a1ab3db43d783af8e4 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202631 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 7 +++-- + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 3 +- + .../glusterd/src/glusterd-gfproxyd-svc-helper.c | 2 +- + xlators/mgmt/glusterd/src/glusterd-handler.c | 8 ++++- + xlators/mgmt/glusterd/src/glusterd-mountbroker.c | 5 ++- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 8 +++++ + xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 2 ++ + xlators/mgmt/glusterd/src/glusterd-server-quorum.c | 1 + + xlators/mgmt/glusterd/src/glusterd-store.c | 4 --- + xlators/mgmt/glusterd/src/glusterd-svc-helper.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 + + .../mgmt/glusterd/src/glusterd-tierd-svc-helper.c | 4 +-- + xlators/mgmt/glusterd/src/glusterd-utils.c | 9 ++++-- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 36 +++++++++++++--------- + 14 files changed, 63 insertions(+), 31 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index d424f31..121346c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -2032,7 +2032,6 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + case GF_OP_CMD_STATUS: + ret = 0; + goto out; +- + case GF_OP_CMD_DETACH_START: + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + snprintf(msg, sizeof(msg), +@@ -2044,7 +2043,7 @@ glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr) + errstr); + goto out; + } +- ++ /* Fall through */ + case GF_OP_CMD_START: { + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + dict_getn(dict, "replica-count", SLEN("replica-count"))) { +@@ -2259,7 +2258,8 @@ out: + if (op_errstr) + *op_errstr = errstr; + } +- ++ if (!op_errstr && errstr) ++ GF_FREE(errstr); + return ret; + } + +@@ -2687,6 +2687,7 @@ glusterd_op_remove_brick(dict_t *dict, char **op_errstr) + * Update defrag_cmd as well or it will only be done + * for nodes on which the brick to be removed exists. + */ ++ /* coverity[MIXED_ENUMS] */ + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY, +diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +index 85c06c1..5a91df4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c ++++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +@@ -4107,6 +4107,7 @@ gd_pause_or_resume_gsync(dict_t *dict, char *master, char *slave, + + out: + sys_close(pfd); ++ /* coverity[INTEGER_OVERFLOW] */ + return ret; + } + +@@ -4183,7 +4184,7 @@ stop_gsync(char *master, char *slave, char **msg, char *conf_path, + + out: + sys_close(pfd); +- ++ /* coverity[INTEGER_OVERFLOW] */ + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c +index 67e3f41..e338bf4 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c +@@ -111,7 +111,7 @@ glusterd_svc_get_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(*tmpvol); + if (tmp_fd < 0) { + gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 2e73c98..1f31e72 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -930,6 +930,7 @@ __glusterd_handle_cluster_lock(rpcsvc_request_t *req) + + op_ctx = dict_new(); + if (!op_ctx) { ++ ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL, + "Unable to set new dict"); + goto out; +@@ -956,6 +957,9 @@ out: + glusterd_friend_sm(); + glusterd_op_sm(); + ++ if (ret) ++ GF_FREE(ctx); ++ + return ret; + } + +@@ -3470,6 +3474,7 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options, + GF_ASSERT(this); + + GF_ASSERT(options); ++ GF_VALIDATE_OR_GOTO(this->name, rpc, out); + + if (force && rpc && *rpc) { + (void)rpc_clnt_unref(*rpc); +@@ -3482,7 +3487,6 @@ glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options, + goto out; + + ret = rpc_clnt_register_notify(new_rpc, notify_fn, notify_data); +- *rpc = new_rpc; + if (ret) + goto out; + ret = rpc_clnt_start(new_rpc); +@@ -3491,6 +3495,8 @@ out: + if (new_rpc) { + (void)rpc_clnt_unref(new_rpc); + } ++ } else { ++ *rpc = new_rpc; + } + + gf_msg_debug(this->name, 0, "returning %d", ret); +diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +index 332ddef..c017ccb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +@@ -334,7 +334,10 @@ make_ghadoop_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user, + if (ret == -1) + return ret; + +- return parse_mount_pattern_desc(mspec, hadoop_mnt_desc); ++ ret = parse_mount_pattern_desc(mspec, hadoop_mnt_desc); ++ GF_FREE(hadoop_mnt_desc); ++ ++ return ret; + } + + static gf_boolean_t +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 6475611..46fc607 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2467,6 +2467,7 @@ glusterd_start_bricks(glusterd_volinfo_t *volinfo) + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { ++ /* coverity[SLEEP] */ + ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } +@@ -3466,6 +3467,7 @@ _add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) + + switch (op) { + case GD_OP_REMOVE_TIER_BRICK: ++ /* Fall through */ + case GD_OP_REMOVE_BRICK: + snprintf(key, sizeof(key), "task%d", index); + ret = _add_remove_bricks_to_dict(dict, volinfo, key); +@@ -7550,6 +7552,7 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + glusterd_op_t op = GD_OP_NONE; + glusterd_req_ctx_t *req_ctx = NULL; + char *op_errstr = NULL; ++ gf_boolean_t free_req_ctx = _gf_false; + + this = THIS; + priv = this->private; +@@ -7558,6 +7561,9 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + req_ctx = ctx; + } else { + req_ctx = GF_CALLOC(1, sizeof(*req_ctx), gf_gld_mt_op_allack_ctx_t); ++ if (!req_ctx) ++ goto out; ++ free_req_ctx = _gf_true; + op = glusterd_op_get_op(); + req_ctx->op = op; + gf_uuid_copy(req_ctx->uuid, MY_UUID); +@@ -7588,6 +7594,8 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + } + + out: ++ if (ret && req_ctx && free_req_ctx) ++ GF_FREE(req_ctx); + gf_msg_debug(this->name, 0, "Returning with %d", ret); + + return ret; +diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +index 8c1feeb..1a65359 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +@@ -82,6 +82,7 @@ glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo) + call_rcu(&peerinfo->rcu_head.head, glusterd_peerinfo_destroy); + + if (quorum_action) ++ /* coverity[SLEEP] */ + glusterd_do_quorum_action(); + return 0; + } +@@ -358,6 +359,7 @@ glusterd_uuid_to_hostname(uuid_t uuid) + + if (!gf_uuid_compare(MY_UUID, uuid)) { + hostname = gf_strdup("localhost"); ++ return hostname; + } + RCU_READ_LOCK; + if (!cds_list_empty(&priv->peers)) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +index fd334e6..f378187 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c ++++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c +@@ -372,6 +372,7 @@ glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo, + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { ++ /* coverity[SLEEP] */ + ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c +index b3b5ee9..4fa8116 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-store.c ++++ b/xlators/mgmt/glusterd/src/glusterd-store.c +@@ -4764,10 +4764,6 @@ glusterd_store_retrieve_peers(xlator_t *this) + */ + address = cds_list_entry(peerinfo->hostnames.next, + glusterd_peer_hostname_t, hostname_list); +- if (!address) { +- ret = -1; +- goto next; +- } + peerinfo->hostname = gf_strdup(address->hostname); + + ret = glusterd_friend_add_from_peerinfo(peerinfo, 1, NULL); +diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +index ca19a75..1d1f42d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c +@@ -179,7 +179,7 @@ glusterd_svc_check_volfile_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(tmpvol); + if (tmp_fd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +@@ -241,7 +241,7 @@ glusterd_svc_check_topology_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmpfd = mkstemp(tmpvol); + if (tmpfd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 618d8bc..9e47d14 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1752,6 +1752,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + if (dict_get(op_ctx, "client-count")) + break; + } ++ /* coverity[MIXED_ENUMS] */ + } else if (cmd == GF_OP_CMD_DETACH_START) { + op = GD_OP_REMOVE_BRICK; + dict_del(req_dict, "rebalance-command"); +diff --git a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c +index 922eae7..59843a0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c ++++ b/xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c +@@ -116,7 +116,7 @@ glusterd_svc_check_tier_volfile_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmp_fd = mkstemp(tmpvol); + if (tmp_fd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +@@ -177,7 +177,7 @@ glusterd_svc_check_tier_topology_identical(char *svc_name, + goto out; + } + +- /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */ ++ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */ + tmpfd = mkstemp(tmpvol); + if (tmpfd < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 14e23d1..8b0fc9a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -908,6 +908,7 @@ glusterd_create_sub_tier_volinfo(glusterd_volinfo_t *volinfo, + (*dup_volinfo)->brick_count = tier_info->cold_brick_count; + } + out: ++ /* coverity[REVERSE_NULL] */ + if (ret && *dup_volinfo) { + glusterd_volinfo_delete(*dup_volinfo); + *dup_volinfo = NULL; +@@ -2738,6 +2739,7 @@ glusterd_readin_file(const char *filepath, int *line_count) + /* Reduce allocation to minimal size. */ + p = GF_REALLOC(lines, (counter + 1) * sizeof(char *)); + if (!p) { ++ /* coverity[TAINTED_SCALAR] */ + free_lines(lines, counter); + lines = NULL; + goto out; +@@ -6782,6 +6784,7 @@ glusterd_restart_bricks(void *opaque) + if (!brickinfo->start_triggered) { + pthread_mutex_lock(&brickinfo->restart_mutex); + { ++ /* coverity[SLEEP] */ + glusterd_brick_start(volinfo, brickinfo, _gf_false, + _gf_false); + } +@@ -8886,7 +8889,7 @@ glusterd_nfs_statedump(char *options, int option_cnt, char **op_errstr) + kill(pid, SIGUSR1); + + sleep(1); +- ++ /* coverity[TAINTED_STRING] */ + sys_unlink(dumpoptions_path); + ret = 0; + out: +@@ -9012,6 +9015,7 @@ glusterd_quotad_statedump(char *options, int option_cnt, char **op_errstr) + + sleep(1); + ++ /* coverity[TAINTED_STRING] */ + sys_unlink(dumpoptions_path); + ret = 0; + out: +@@ -13423,7 +13427,7 @@ glusterd_get_global_options_for_all_vols(rpcsvc_request_t *req, dict_t *ctx, + if (key_fixed) + key = key_fixed; + } +- ++ /* coverity[CONSTANT_EXPRESSION_RESULT] */ + ALL_VOLUME_OPTION_CHECK("all", _gf_true, key, ret, op_errstr, out); + + for (i = 0; valid_all_vol_opts[i].option; i++) { +@@ -14153,6 +14157,7 @@ glusterd_disallow_op_for_tier(glusterd_volinfo_t *volinfo, glusterd_op_t op, + break; + case GD_OP_REMOVE_BRICK: + switch (cmd) { ++ /* coverity[MIXED_ENUMS] */ + case GF_DEFRAG_CMD_DETACH_START: + case GF_OP_CMD_DETACH_COMMIT_FORCE: + case GF_OP_CMD_DETACH_COMMIT: +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 539e8a5..6852f8e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -322,7 +322,7 @@ volopt_trie_cbk(char *word, void *param) + } + + static int +-process_nodevec(struct trienodevec *nodevec, char **hint) ++process_nodevec(struct trienodevec *nodevec, char **outputhint, char *inputhint) + { + int ret = 0; + char *hint1 = NULL; +@@ -331,14 +331,14 @@ process_nodevec(struct trienodevec *nodevec, char **hint) + trienode_t **nodes = nodevec->nodes; + + if (!nodes[0]) { +- *hint = NULL; ++ *outputhint = NULL; + return 0; + } + + #if 0 + /* Limit as in git */ + if (trienode_get_dist (nodes[0]) >= 6) { +- *hint = NULL; ++ *outputhint = NULL; + return 0; + } + #endif +@@ -347,23 +347,30 @@ process_nodevec(struct trienodevec *nodevec, char **hint) + return -1; + + if (nodevec->cnt < 2 || !nodes[1]) { +- *hint = hint1; ++ *outputhint = hint1; + return 0; + } + +- if (trienode_get_word(nodes[1], &hint2)) ++ if (trienode_get_word(nodes[1], &hint2)) { ++ GF_FREE(hint1); + return -1; ++ } + +- if (*hint) +- hintinfx = *hint; +- ret = gf_asprintf(hint, "%s or %s%s", hint1, hintinfx, hint2); ++ if (inputhint) ++ hintinfx = inputhint; ++ ret = gf_asprintf(outputhint, "%s or %s%s", hint1, hintinfx, hint2); + if (ret > 0) + ret = 0; ++ if (hint1) ++ GF_FREE(hint1); ++ if (hint2) ++ GF_FREE(hint2); + return ret; + } + + static int +-volopt_trie_section(int lvl, char **patt, char *word, char **hint, int hints) ++volopt_trie_section(int lvl, char **patt, char *word, char **outputhint, ++ char *inputhint, int hints) + { + trienode_t *nodes[] = {NULL, NULL}; + struct trienodevec nodevec = {nodes, 2}; +@@ -384,7 +391,7 @@ volopt_trie_section(int lvl, char **patt, char *word, char **hint, int hints) + nodevec.cnt = hints; + ret = trie_measure_vec(trie, word, &nodevec); + if (!ret && nodevec.nodes[0]) +- ret = process_nodevec(&nodevec, hint); ++ ret = process_nodevec(&nodevec, outputhint, inputhint); + + trie_destroy(trie); + +@@ -396,6 +403,7 @@ volopt_trie(char *key, char **hint) + { + char *patt[] = {NULL}; + char *fullhint = NULL; ++ char *inputhint = NULL; + char *dot = NULL; + char *dom = NULL; + int len = 0; +@@ -405,7 +413,7 @@ volopt_trie(char *key, char **hint) + + dot = strchr(key, '.'); + if (!dot) +- return volopt_trie_section(1, patt, key, hint, 2); ++ return volopt_trie_section(1, patt, key, hint, inputhint, 2); + + len = dot - key; + dom = gf_strdup(key); +@@ -413,7 +421,7 @@ volopt_trie(char *key, char **hint) + return -1; + dom[len] = '\0'; + +- ret = volopt_trie_section(0, NULL, dom, patt, 1); ++ ret = volopt_trie_section(0, NULL, dom, patt, inputhint, 1); + GF_FREE(dom); + if (ret) { + patt[0] = NULL; +@@ -422,8 +430,8 @@ volopt_trie(char *key, char **hint) + if (!patt[0]) + goto out; + +- *hint = "..."; +- ret = volopt_trie_section(1, patt, dot + 1, hint, 2); ++ inputhint = "..."; ++ ret = volopt_trie_section(1, patt, dot + 1, hint, inputhint, 2); + if (ret) + goto out; + if (*hint) { +-- +1.8.3.1 + diff --git a/SOURCES/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch b/SOURCES/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch new file mode 100644 index 0000000..d980eb5 --- /dev/null +++ b/SOURCES/0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch @@ -0,0 +1,54 @@ +From 0e3871a57b7a621444dc5cfd49935a1e412f6436 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Mon, 8 Jun 2020 13:27:50 +0530 +Subject: [PATCH 433/449] socket: Resolve ssl_ctx leak for a brick while only + mgmt SSL is enabled + +Problem: While only mgmt SSL is enabled for a brick process use_ssl flag + is false for a brick process and socket api's cleanup ssl_ctx only + while use_ssl and ssl_ctx both are valid + +Solution: To avoid a leak check only ssl_ctx, if it is valid cleanup + ssl_ctx + +> Fixes: #1196 +> Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit 9873baee34afdf0c20f5fc98a7dbf2a9f07447e2) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24366/) + +BUG: 1810924 +Change-Id: I2f4295478f4149dcb7d608ea78ee5104f28812c3 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202625 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 226b2e2..54cd5df 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -1163,7 +1163,7 @@ __socket_reset(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +@@ -4685,7 +4685,7 @@ fini(rpc_transport_t *this) + SSL_free(priv->ssl_ssl); + priv->ssl_ssl = NULL; + } +- if (priv->use_ssl && priv->ssl_ctx) { ++ if (priv->ssl_ctx) { + SSL_CTX_free(priv->ssl_ctx); + priv->ssl_ctx = NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch b/SOURCES/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch new file mode 100644 index 0000000..3f038a3 --- /dev/null +++ b/SOURCES/0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch @@ -0,0 +1,39 @@ +From 2ea3fc203671429d0aa9994e5bbd57f6a604523d Mon Sep 17 00:00:00 2001 +From: Xie Changlong <xiechanglong@cmss.chinamobile.com> +Date: Mon, 28 Oct 2019 17:43:28 +0800 +Subject: [PATCH 434/449] glusterd/ganesha: fix Coverity CID 1405785 + +To avoid double free + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/23630/ +> updates: bz#789278 +> Change-Id: I15ae54ed696295d4cb015668722e77983b062ccb +> Signed-off-by: Xie Changlong <xiechanglong@cmss.chinamobile.com> + +BUG: 1787310 +Change-Id: I15ae54ed696295d4cb015668722e77983b062ccb +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202623 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-ganesha.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +index 06f028f..caba34f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-ganesha.c ++++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c +@@ -659,8 +659,8 @@ tear_down_cluster(gf_boolean_t run_teardown) + "Failed to close dir %s. Reason :" + " %s", + CONFDIR, strerror(errno)); +- goto exit; + } ++ goto exit; + } + + out: +-- +1.8.3.1 + diff --git a/SOURCES/0435-glusterd-coverity-fix.patch b/SOURCES/0435-glusterd-coverity-fix.patch new file mode 100644 index 0000000..f587107 --- /dev/null +++ b/SOURCES/0435-glusterd-coverity-fix.patch @@ -0,0 +1,38 @@ +From 9425fd5a49a17a8f91c13632ae055a6510b0b44c Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Fri, 17 May 2019 14:27:58 +0530 +Subject: [PATCH 435/449] glusterd: coverity fix + +CID: 1401345 - Unused value + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22737/ +> updates: bz#789278 +> Change-Id: I6b8f2611151ce0174042384b7632019c312ebae3 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1787310 +Change-Id: I6b8f2611151ce0174042384b7632019c312ebae3 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202622 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 8b0fc9a..2eb2a76 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3740,7 +3740,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + * deleting a volume, so we shouldn't be trying to create a + * fresh volume here which would lead to a stale entry + */ +- if (stage_deleted == 0) ++ if (!ret && stage_deleted == 0) + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + ret = 0; + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0436-glusterd-coverity-fixes.patch b/SOURCES/0436-glusterd-coverity-fixes.patch new file mode 100644 index 0000000..799681f --- /dev/null +++ b/SOURCES/0436-glusterd-coverity-fixes.patch @@ -0,0 +1,187 @@ +From 179213798496448316547506da65dbd9fd741dfa Mon Sep 17 00:00:00 2001 +From: Atin Mukherjee <amukherj@redhat.com> +Date: Wed, 24 Apr 2019 22:02:51 +0530 +Subject: [PATCH 436/449] glusterd: coverity fixes + +Addresses the following: + +* CID 1124776: Resource leaks (RESOURCE_LEAK) - Variable "aa" going out +of scope leaks the storage it points to in glusterd-volgen.c + +* Bunch of CHECKED_RETURN defects in the callers of synctask_barrier_init + +* CID 1400742: Program hangs (LOCK) - adding annotation to fix this +false positive + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22615 +> Updates: bz#789278 +> Change-Id: I02f16e7eeb8c5cf72f7d0b29d00df4f03b3718b3 +> Signed-off-by: Atin Mukherjee <amukherj@redhat.com> + +BUG: 1787310 +Change-Id: I02f16e7eeb8c5cf72f7d0b29d00df4f03b3718b3 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202626 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-handler.c | 6 ++++++ + xlators/mgmt/glusterd/src/glusterd-mgmt.c | 24 +++++++++++++++++++----- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 22 ++++++++++++++++++---- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 5 +++-- + 4 files changed, 46 insertions(+), 11 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index 1f31e72..b8799ab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -3458,6 +3458,12 @@ glusterd_friend_remove(uuid_t uuid, char *hostname) + ret = glusterd_peerinfo_cleanup(peerinfo); + out: + gf_msg_debug(THIS->name, 0, "returning %d", ret); ++ /* We don't need to do a mutex unlock of peerinfo->delete_lock as the same ++ * will be anyway destroyed within glusterd_peerinfo_cleanup, coverity ++ * though cries about it ++ */ ++ /* coverity[LOCK] */ ++ + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +index a4915f3..1e185d7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c ++++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c +@@ -757,7 +757,10 @@ glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict, + + /* Sending mgmt_v3 lock req to other nodes in the cluster */ + gd_syncargs_init(&args, NULL); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1108,7 +1111,10 @@ glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict, + + /* Sending Pre Validation req to other nodes in the cluster */ + gd_syncargs_init(&args, req_dict); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1458,7 +1464,10 @@ glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + + /* Sending brick op req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1722,7 +1731,9 @@ glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + + /* Sending commit req to other nodes in the cluster */ + gd_syncargs_init(&args, op_ctx); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1963,7 +1974,10 @@ glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict, + + /* Sending Post Validation req to other nodes in the cluster */ + gd_syncargs_init(&args, req_dict); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index 9e47d14..c78983a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1191,7 +1191,12 @@ gd_lock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + struct syncargs args = {0}; + + this = THIS; +- synctask_barrier_init((&args)); ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1321,7 +1326,10 @@ stage_done: + } + + gd_syncargs_init(&args, aggr_dict); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + RCU_READ_LOCK; +@@ -1449,7 +1457,10 @@ commit_done: + } + + gd_syncargs_init(&args, op_ctx); +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + origin_glusterd = is_origin_glusterd(req_dict); + +@@ -1541,7 +1552,10 @@ gd_unlock_op_phase(glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, + goto out; + } + +- synctask_barrier_init((&args)); ++ ret = synctask_barrier_init((&args)); ++ if (ret) ++ goto out; ++ + peer_cnt = 0; + + if (cluster_lock) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 6852f8e..16346e7 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4808,9 +4808,10 @@ nfs_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + + if (ret != -1) { + ret = gf_canonicalize_path(vme->value); +- if (ret) ++ if (ret) { ++ GF_FREE(aa); + return -1; +- ++ } + ret = xlator_set_option(xl, aa, ret, vme->value); + GF_FREE(aa); + } +-- +1.8.3.1 + diff --git a/SOURCES/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch b/SOURCES/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch new file mode 100644 index 0000000..21fcd8a --- /dev/null +++ b/SOURCES/0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch @@ -0,0 +1,48 @@ +From ffd428d07036531b7ed98c7393b87490aaa223ec Mon Sep 17 00:00:00 2001 +From: Niels de Vos <ndevos@redhat.com> +Date: Fri, 3 May 2019 09:18:31 +0200 +Subject: [PATCH 437/449] glusterd: prevent use-after-free in + glusterd_op_ac_send_brick_op() + +Coverity reported that GF_FREE(req_ctx) could be called 2x on req_ctx. + +> upstream patch link: https://review.gluster.org/#/c/glusterfs/+/22656/ +> Change-Id: I9120686e5920de8c27688e10de0db6aa26292064 +> CID: 1401115 +> Updates: bz#789278 +> Signed-off-by: Niels de Vos <ndevos@redhat.com> + +BUG: 1787310 +Change-Id: I9120686e5920de8c27688e10de0db6aa26292064 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202619 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 46fc607..1e84f5f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -7575,7 +7575,6 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + if (op_errstr == NULL) + gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; +- GF_FREE(req_ctx); + goto out; + } + } +@@ -7594,7 +7593,7 @@ glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx) + } + + out: +- if (ret && req_ctx && free_req_ctx) ++ if (ret && free_req_ctx) + GF_FREE(req_ctx); + gf_msg_debug(this->name, 0, "Returning with %d", ret); + +-- +1.8.3.1 + diff --git a/SOURCES/0438-dht-sparse-files-rebalance-enhancements.patch b/SOURCES/0438-dht-sparse-files-rebalance-enhancements.patch new file mode 100644 index 0000000..6e10ce6 --- /dev/null +++ b/SOURCES/0438-dht-sparse-files-rebalance-enhancements.patch @@ -0,0 +1,324 @@ +From 7b2f1bd4e5a57ea3abd5f14a7d81b120735faecd Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Wed, 6 May 2020 13:28:40 +0300 +Subject: [PATCH 438/449] dht - sparse files rebalance enhancements + +Currently data migration in rebalance reads sparse file sequentially, +disregarding which segments are holes and which are data. This can lead +to extremely long migration time for large sparse file. +Data migration mechanism needs to be enhanced so only data segments are +read and migrated. This can be achieved using lseek to seek for holes +and data in the file. +This enhancement is a consequence of +https://bugzilla.redhat.com/show_bug.cgi?id=1823703 + +> fixes: #1222 +> Change-Id: If5f448a0c532926464e1f34f504c5c94749b08c3 +> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +> (Cherry pick from commit 7b7559733ca0c25c63f9d56cb7f4650dbd694c40) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24409/) + +BUG: 1836099 +Change-Id: If5f448a0c532926464e1f34f504c5c94749b08c3 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202647 +Reviewed-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/distribute/spare_file_rebalance.t | 51 ++++++++ + xlators/cluster/dht/src/dht-rebalance.c | 172 ++++++++++++-------------- + 2 files changed, 130 insertions(+), 93 deletions(-) + create mode 100644 tests/basic/distribute/spare_file_rebalance.t + +diff --git a/tests/basic/distribute/spare_file_rebalance.t b/tests/basic/distribute/spare_file_rebalance.t +new file mode 100644 +index 0000000..061c02f +--- /dev/null ++++ b/tests/basic/distribute/spare_file_rebalance.t +@@ -0,0 +1,51 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../dht.rc ++ ++# Initialize ++#------------------------------------------------------------ ++cleanup; ++ ++# Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++# Create a volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; ++ ++# Verify volume creation ++EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++# Start volume and verify successful start ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; ++ ++#------------------------------------------------------------ ++ ++# Test case - Create sparse files on MP and verify ++# file info after rebalance ++#------------------------------------------------------------ ++ ++# Create some sparse files and get their size ++TEST cd $M0; ++dd if=/dev/urandom of=sparse_file bs=10k count=1 seek=2M ++cp --sparse=always sparse_file sparse_file_3; ++ ++# Add a 3rd brick ++TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3; ++ ++# Trigger rebalance ++TEST $CLI volume rebalance $V0 start force; ++EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed; ++ ++# Compare original and rebalanced files ++TEST cd $B0/${V0}2 ++TEST cmp sparse_file $B0/${V0}3/sparse_file_3 ++EXPECT_WITHIN 30 ""; ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 88b6b54..d0c21b4 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -18,8 +18,8 @@ + #include <glusterfs/events.h> + + #define GF_DISK_SECTOR_SIZE 512 +-#define DHT_REBALANCE_PID 4242 /* Change it if required */ +-#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ ++#define DHT_REBALANCE_PID 4242 /* Change it if required */ ++#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */ + #define MAX_MIGRATE_QUEUE_COUNT 500 + #define MIN_MIGRATE_QUEUE_COUNT 200 + #define MAX_REBAL_TYPE_SIZE 16 +@@ -178,75 +178,6 @@ dht_strip_out_acls(dict_t *dict) + } + } + +-static int +-dht_write_with_holes(xlator_t *to, fd_t *fd, struct iovec *vec, int count, +- int32_t size, off_t offset, struct iobref *iobref, +- int *fop_errno) +-{ +- int i = 0; +- int ret = -1; +- int start_idx = 0; +- int tmp_offset = 0; +- int write_needed = 0; +- int buf_len = 0; +- int size_pending = 0; +- char *buf = NULL; +- +- /* loop through each vector */ +- for (i = 0; i < count; i++) { +- buf = vec[i].iov_base; +- buf_len = vec[i].iov_len; +- +- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len; +- start_idx += GF_DISK_SECTOR_SIZE) { +- if (mem_0filled(buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) { +- write_needed = 1; +- continue; +- } +- +- if (write_needed) { +- ret = syncop_write( +- to, fd, (buf + tmp_offset), (start_idx - tmp_offset), +- (offset + tmp_offset), iobref, 0, NULL, NULL); +- /* 'path' will be logged in calling function */ +- if (ret < 0) { +- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", +- strerror(-ret)); +- *fop_errno = -ret; +- ret = -1; +- goto out; +- } +- +- write_needed = 0; +- } +- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE; +- } +- +- if ((start_idx < buf_len) || write_needed) { +- /* This means, last chunk is not yet written.. write it */ +- ret = syncop_write(to, fd, (buf + tmp_offset), +- (buf_len - tmp_offset), (offset + tmp_offset), +- iobref, 0, NULL, NULL); +- if (ret < 0) { +- /* 'path' will be logged in calling function */ +- gf_log(THIS->name, GF_LOG_WARNING, "failed to write (%s)", +- strerror(-ret)); +- *fop_errno = -ret; +- ret = -1; +- goto out; +- } +- } +- +- size_pending = (size - buf_len); +- if (!size_pending) +- break; +- } +- +- ret = size; +-out: +- return ret; +-} +- + /* + return values: + -1 : failure +@@ -1101,32 +1032,97 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + int ret = 0; + int count = 0; + off_t offset = 0; ++ off_t data_offset = 0; ++ off_t hole_offset = 0; + struct iovec *vector = NULL; + struct iobref *iobref = NULL; + uint64_t total = 0; + size_t read_size = 0; ++ size_t data_block_size = 0; + dict_t *xdata = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; ++ + /* if file size is '0', no need to enter this loop */ + while (total < ia_size) { +- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) +- ? DHT_REBALANCE_BLKSIZE +- : (ia_size - total)); ++ /* This is a regular file - read it sequentially */ ++ if (!hole_exists) { ++ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ++ ? DHT_REBALANCE_BLKSIZE ++ : (ia_size - total)); ++ } else { ++ /* This is a sparse file - read only the data segments in the file ++ */ ++ ++ /* If the previous data block is fully copied, find the next data ++ * segment ++ * starting at the offset of the last read and written byte, */ ++ if (data_block_size <= 0) { ++ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL, ++ &data_offset); ++ if (ret) { ++ if (ret == -ENXIO) ++ ret = 0; /* No more data segments */ ++ else ++ *fop_errno = -ret; /* Error occurred */ ++ ++ break; ++ } ++ ++ /* If the position of the current data segment is greater than ++ * the position of the next hole, find the next hole in order to ++ * calculate the length of the new data segment */ ++ if (data_offset > hole_offset) { ++ /* Starting at the offset of the last data segment, find the ++ * next hole */ ++ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE, ++ NULL, &hole_offset); ++ if (ret) { ++ /* If an error occurred here it's a real error because ++ * if the seek for a data segment was successful then ++ * necessarily another hole must exist (EOF is a hole) ++ */ ++ *fop_errno = -ret; ++ break; ++ } ++ ++ /* Calculate the total size of the current data block */ ++ data_block_size = hole_offset - data_offset; ++ } ++ } else { ++ /* There is still data in the current segment, move the ++ * data_offset to the position of the last written byte */ ++ data_offset = offset; ++ } ++ ++ /* Calculate how much data needs to be read and written. If the data ++ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and ++ * write DHT_REBALANCE_BLKSIZE data length and the rest in the ++ * next iteration(s) */ ++ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE) ++ ? DHT_REBALANCE_BLKSIZE ++ : data_block_size); ++ ++ /* Calculate the remaining size of the data block - maybe there's no ++ * need to seek for data in the next iteration */ ++ data_block_size -= read_size; ++ ++ /* Set offset to the offset of the data segment so read and write ++ * will have the correct position */ ++ offset = data_offset; ++ } + + ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count, + &iobref, NULL, NULL, NULL); ++ + if (!ret || (ret < 0)) { + *fop_errno = -ret; + break; + } + +- if (hole_exists) { +- ret = dht_write_with_holes(to, dst, vector, count, ret, offset, +- iobref, fop_errno); +- } else { +- if (!conf->force_migration && !dht_is_tier_xlator(this)) { ++ if (!conf->force_migration && !dht_is_tier_xlator(this)) { ++ if (!xdata) { + xdata = dict_new(); + if (!xdata) { + gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED, +@@ -1146,7 +1142,7 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + * https://github.com/gluster/glusterfs/issues/308 + * for more details. + */ +- ret = dict_set_int32(xdata, GF_AVOID_OVERWRITE, 1); ++ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1); + if (ret) { + gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM, + "failed to set dict"); +@@ -1155,22 +1151,12 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + break; + } + } +- +- ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, +- NULL, xdata, NULL); +- if (ret < 0) { +- *fop_errno = -ret; +- } +- } +- +- if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && +- (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { +- gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, +- "Migrate file paused"); +- ret = -1; + } + ++ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL, ++ NULL, xdata, NULL); + if (ret < 0) { ++ *fop_errno = -ret; + break; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0439-cluster-afr-Delay-post-op-for-fsync.patch b/SOURCES/0439-cluster-afr-Delay-post-op-for-fsync.patch new file mode 100644 index 0000000..dc1593b --- /dev/null +++ b/SOURCES/0439-cluster-afr-Delay-post-op-for-fsync.patch @@ -0,0 +1,438 @@ +From 3ed98fc9dcb39223032e343fd5b0ad17fa3cae14 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Fri, 29 May 2020 14:24:53 +0530 +Subject: [PATCH 439/449] cluster/afr: Delay post-op for fsync + +Problem: +AFR doesn't delay post-op for fsync fop. For fsync heavy workloads +this leads to un-necessary fxattrop/finodelk for every fsync leading +to bad performance. + +Fix: +Have delayed post-op for fsync. Add special flag in xdata to indicate +that afr shouldn't delay post-op in cases where either the +process will terminate or graph-switch would happen. Otherwise it leads +to un-necessary heals when the graph-switch/process-termination +happens before delayed-post-op completes. + +> Upstream-patch: https://review.gluster.org/c/glusterfs/+/24473 +> Fixes: #1253 + +BUG: 1838479 +Change-Id: I531940d13269a111c49e0510d49514dc169f4577 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202676 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + api/src/glfs-resolve.c | 14 ++- + tests/basic/afr/durability-off.t | 2 + + tests/basic/gfapi/gfapi-graph-switch-open-fd.t | 44 +++++++++ + tests/basic/gfapi/gfapi-keep-writing.c | 129 +++++++++++++++++++++++++ + xlators/cluster/afr/src/afr-inode-write.c | 11 ++- + xlators/cluster/afr/src/afr-transaction.c | 9 +- + xlators/cluster/afr/src/afr.h | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 15 ++- + xlators/mount/fuse/src/fuse-bridge.c | 23 ++++- + 9 files changed, 239 insertions(+), 10 deletions(-) + create mode 100644 tests/basic/gfapi/gfapi-graph-switch-open-fd.t + create mode 100644 tests/basic/gfapi/gfapi-keep-writing.c + +diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c +index a79f490..062b7dc 100644 +--- a/api/src/glfs-resolve.c ++++ b/api/src/glfs-resolve.c +@@ -722,6 +722,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + 0, + }; + char uuid1[64]; ++ dict_t *xdata = NULL; + + oldinode = oldfd->inode; + oldsubvol = oldinode->table->xl; +@@ -730,7 +731,15 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd) + return fd_ref(oldfd); + + if (!oldsubvol->switched) { +- ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_msg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED, ++ "last-fsync set failed on %s graph %s (%d)", ++ uuid_utoa_r(oldfd->inode->gfid, uuid1), ++ graphid_str(oldsubvol), oldsubvol->graph->id); ++ } ++ ++ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL); + DECODE_SYNCOP_ERR(ret); + if (ret) { + gf_msg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED, +@@ -809,6 +818,9 @@ out: + newfd = NULL; + } + ++ if (xdata) ++ dict_unref(xdata); ++ + return newfd; + } + +diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t +index 155ffa0..6e0f18b 100644 +--- a/tests/basic/afr/durability-off.t ++++ b/tests/basic/afr/durability-off.t +@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0 + EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l) + ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + #Test that fsyncs happen when durability is on + TEST $CLI volume set $V0 cluster.ensure-durability on + TEST $CLI volume set $V0 performance.strict-write-ordering on +diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +new file mode 100644 +index 0000000..2e666be +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t +@@ -0,0 +1,44 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2}; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++TEST touch $M0/sync ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi ++ ++ ++#Launch a program to keep doing writes on an fd ++./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync & ++p=$! ++sleep 1 #Let some writes go through ++#Check if graph switch will lead to any pending markers for ever ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++ ++ ++TEST rm -f $M0/sync #Make sure the glfd is closed ++TEST wait #Wait for background process to die ++#Goal is to check if there is permanent FOOL changelog ++sleep 5 ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty ++EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty ++ ++cleanup_tester $(dirname $0)/gfapi-async-calls-test ++ ++cleanup; +diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c +new file mode 100644 +index 0000000..91b59ce +--- /dev/null ++++ b/tests/basic/gfapi/gfapi-keep-writing.c +@@ -0,0 +1,129 @@ ++#include <fcntl.h> ++#include <unistd.h> ++#include <time.h> ++#include <limits.h> ++#include <string.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <errno.h> ++#include <glusterfs/api/glfs.h> ++#include <glusterfs/api/glfs-handles.h> ++ ++#define LOG_ERR(msg) \ ++ do { \ ++ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \ ++ } while (0) ++ ++glfs_t * ++init_glfs(const char *hostname, const char *volname, const char *logfile) ++{ ++ int ret = -1; ++ glfs_t *fs = NULL; ++ ++ fs = glfs_new(volname); ++ if (!fs) { ++ LOG_ERR("glfs_new failed"); ++ return NULL; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_volfile_server failed"); ++ goto out; ++ } ++ ++ ret = glfs_set_logging(fs, logfile, 7); ++ if (ret < 0) { ++ LOG_ERR("glfs_set_logging failed"); ++ goto out; ++ } ++ ++ ret = glfs_init(fs); ++ if (ret < 0) { ++ LOG_ERR("glfs_init failed"); ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ if (ret) { ++ glfs_fini(fs); ++ fs = NULL; ++ } ++ ++ return fs; ++} ++ ++int ++glfs_test_function(const char *hostname, const char *volname, ++ const char *logfile, const char *syncfile) ++{ ++ int ret = -1; ++ int flags = O_CREAT | O_RDWR; ++ glfs_t *fs = NULL; ++ glfs_fd_t *glfd = NULL; ++ const char *buff = "This is from my prog\n"; ++ const char *filename = "glfs_test.txt"; ++ struct stat buf = {0}; ++ ++ fs = init_glfs(hostname, volname, logfile); ++ if (fs == NULL) { ++ LOG_ERR("init_glfs failed"); ++ return -1; ++ } ++ ++ glfd = glfs_creat(fs, filename, flags, 0644); ++ if (glfd == NULL) { ++ LOG_ERR("glfs_creat failed"); ++ goto out; ++ } ++ ++ while (glfs_stat(fs, syncfile, &buf) == 0) { ++ ret = glfs_write(glfd, buff, strlen(buff), flags); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ } ++ ++ ret = glfs_close(glfd); ++ if (ret < 0) { ++ LOG_ERR("glfs_write failed"); ++ goto out; ++ } ++ ++out: ++ ret = glfs_fini(fs); ++ if (ret) { ++ LOG_ERR("glfs_fini failed"); ++ } ++ ++ return ret; ++} ++ ++int ++main(int argc, char *argv[]) ++{ ++ int ret = 0; ++ char *hostname = NULL; ++ char *volname = NULL; ++ char *logfile = NULL; ++ char *syncfile = NULL; ++ ++ if (argc != 5) { ++ fprintf(stderr, "Invalid argument\n"); ++ exit(1); ++ } ++ ++ hostname = argv[1]; ++ volname = argv[2]; ++ logfile = argv[3]; ++ syncfile = argv[4]; ++ ++ ret = glfs_test_function(hostname, volname, logfile, syncfile); ++ if (ret) { ++ LOG_ERR("glfs_test_function failed"); ++ } ++ ++ return ret; ++} +diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c +index 7fcc9d4..df82b6e 100644 +--- a/xlators/cluster/afr/src/afr-inode-write.c ++++ b/xlators/cluster/afr/src/afr-inode-write.c +@@ -2492,6 +2492,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + call_frame_t *transaction_frame = NULL; + int ret = -1; + int32_t op_errno = ENOMEM; ++ int8_t last_fsync = 0; + + transaction_frame = copy_frame(frame); + if (!transaction_frame) +@@ -2501,10 +2502,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + if (!local) + goto out; + +- if (xdata) ++ if (xdata) { + local->xdata_req = dict_copy_with_ref(xdata, NULL); +- else ++ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) { ++ if (last_fsync) { ++ local->transaction.disable_delayed_post_op = _gf_true; ++ } ++ } ++ } else { + local->xdata_req = dict_new(); ++ } + + if (!local->xdata_req) + goto out; +diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c +index 8e65ae2..ffd0ab8 100644 +--- a/xlators/cluster/afr/src/afr-transaction.c ++++ b/xlators/cluster/afr/src/afr-transaction.c +@@ -2385,8 +2385,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this, + goto out; + } + +- if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) { +- /*Only allow writes but shard does [f]xattrops on writes, so ++ if (local->transaction.disable_delayed_post_op) { ++ goto out; ++ } ++ ++ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) && ++ (local->op != GF_FOP_FSYNC)) { ++ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so + * they are fine too*/ + goto out; + } +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 18f1a6a..ff96246 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -854,7 +854,7 @@ typedef struct _afr_local { + + int (*unwind)(call_frame_t *frame, xlator_t *this); + +- /* post-op hook */ ++ gf_boolean_t disable_delayed_post_op; + } transaction; + + syncbarrier_t barrier; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index d0c21b4..e9974cd 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1550,6 +1550,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + xlator_t *old_target = NULL; + xlator_t *hashed_subvol = NULL; + fd_t *linkto_fd = NULL; ++ dict_t *xdata = NULL; + + if (from == to) { + gf_msg_debug(this->name, 0, +@@ -1868,7 +1869,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, + + /* TODO: Sync the locks */ + +- ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL); ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "%s: failed to set last-fsync flag on " ++ "%s (%s)", ++ loc->path, to->name, strerror(ENOMEM)); ++ } ++ ++ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)", + loc->path, to->name, strerror(-ret)); +@@ -2342,11 +2351,15 @@ out: + + if (dst_fd) + syncop_close(dst_fd); ++ + if (src_fd) + syncop_close(src_fd); + if (linkto_fd) + syncop_close(linkto_fd); + ++ if (xdata) ++ dict_unref(xdata); ++ + loc_wipe(&tmp_loc); + loc_wipe(&parent_loc); + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index fdeec49..4264fad 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -5559,6 +5559,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + char create_in_progress = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + fd_t *oldfd = NULL; ++ dict_t *xdata = NULL; + + basefd_ctx = fuse_fd_ctx_get(this, basefd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); +@@ -5595,10 +5596,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + } + + if (oldfd->inode->table->xl == old_subvol) { +- if (IA_ISDIR(oldfd->inode->ia_type)) ++ if (IA_ISDIR(oldfd->inode->ia_type)) { + ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL); +- else +- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL); ++ } else { ++ xdata = dict_new(); ++ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { ++ gf_log("glusterfs-fuse", GF_LOG_WARNING, ++ "last-fsync set failed (%s) on fd (%p)" ++ "(basefd:%p basefd-inode.gfid:%s) " ++ "(old-subvolume:%s-%d new-subvolume:%s-%d)", ++ strerror(ENOMEM), oldfd, basefd, ++ uuid_utoa(basefd->inode->gfid), old_subvol->name, ++ old_subvol->graph->id, new_subvol->name, ++ new_subvol->graph->id); ++ } ++ ++ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL); ++ } + + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, +@@ -5653,6 +5667,9 @@ out: + + fd_unref(oldfd); + ++ if (xdata) ++ dict_unref(xdata); ++ + return ret; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch b/SOURCES/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch new file mode 100644 index 0000000..a7c1869 --- /dev/null +++ b/SOURCES/0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch @@ -0,0 +1,62 @@ +From 9cbab9110523cfafe23d6c6b3080d0d744062b85 Mon Sep 17 00:00:00 2001 +From: Mohammed Rafi KC <rkavunga@redhat.com> +Date: Thu, 21 May 2020 16:04:33 +0530 +Subject: [PATCH 440/449] glusterd/snapshot: Improve log message during + snapshot clone + +While taking a snapshot clone, if the snapshot is not activated, +the cli was returning that the bricks are down. +This patch clearly print tha the error is due to the snapshot +state. + +>Change-Id: Ia840e6e071342e061ad38bf15e2e2ff2b0dacdfa +>Fixes: #1255 +>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24478/ + +BUG: 1837926 +Change-Id: Ia840e6e071342e061ad38bf15e2e2ff2b0dacdfa +Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202707 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index c56be91..5b8ae97 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -2238,7 +2238,6 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + char *clonename = NULL; + char *snapname = NULL; + char device_name[64] = ""; +- char key[PATH_MAX] = ""; + glusterd_snap_t *snap = NULL; + char err_str[PATH_MAX] = ""; + int ret = -1; +@@ -2299,8 +2298,18 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + goto out; + } + +- snprintf(key, sizeof(key) - 1, "vol1_volid"); +- ret = dict_get_bin(dict, key, (void **)&snap_volid); ++ ++ if (!glusterd_is_volume_started(snap_vol)) { ++ snprintf(err_str, sizeof(err_str), ++ "Snapshot %s is " ++ "not activated", ++ snap->snapname); ++ loglevel = GF_LOG_WARNING; ++ *op_errno = EG_VOLSTP; ++ goto out; ++ } ++ ++ ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, + "Unable to fetch snap_volid"); +-- +1.8.3.1 + diff --git a/SOURCES/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch b/SOURCES/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch new file mode 100644 index 0000000..1e49684 --- /dev/null +++ b/SOURCES/0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch @@ -0,0 +1,195 @@ +From 1bde083cbd1e06be66d00e4ca52075687cee0d60 Mon Sep 17 00:00:00 2001 +From: Csaba Henk <csaba@redhat.com> +Date: Fri, 8 May 2020 23:01:04 +0200 +Subject: [PATCH 441/449] fuse: occasional logging for fuse device 'weird' + write errors + +This change is a followup to +I510158843e4b1d482bdc496c2e97b1860dc1ba93. + +In referred change we pushed log messages about 'weird' +write errors to fuse device out of sight, by reporting +them at Debug loglevel instead of Error (where +'weird' means errno is not POSIX compliant but having +meaningful semantics for FUSE protocol). + +This solved the issue of spurious error reporting. +And so far so good: these messages don't indicate +an error condition by themselves. However, when they +come in high repetitions, that indicates a suboptimal +condition which should be reported.[1] + +Therefore now we shall emit a Warning if a certain +errno occurs a certain number of times[2] as the +outcome of a write to the fuse device. + +___ +[1] typically ENOENTs and ENOTDIRs accumulate +when glusterfs' inode invalidation lags behind +the kernel's internal inode garbage collection +(in this case above errnos mean that the inode +which we requested to be invalidated is not found +in kernel). This can be mitigated with the +invalidate-limit command line / mount option, +cf. bz#1732717. + +[2] 256, as of the current implementation. + +Upstream on https://review.gluster.org/24415 +> Change-Id: I8cc7fe104da43a88875f93b0db49d5677cc16045 +> Updates: #1000 +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1839137 +Change-Id: I8448d6d328d47cb01d560cd99a2f43cd8dab312d +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202646 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 36 +++++++++++++++++++++++++++++++++++- + xlators/mount/fuse/src/fuse-bridge.h | 18 ++++++++++++++++++ + 2 files changed, 53 insertions(+), 1 deletion(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 4264fad..2e7584c 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -218,14 +218,30 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + if (res == -1) { + const char *errdesc = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; ++ gf_boolean_t errno_degraded = _gf_false; ++ gf_boolean_t errno_promoted = _gf_false; ++ ++#define ACCOUNT_ERRNO(eno) \ ++ do { \ ++ if (errno_degraded) { \ ++ pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \ ++ { \ ++ if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \ ++ errno_promoted = _gf_true; \ ++ } \ ++ pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \ ++ } \ ++ } while (0) + + /* If caller masked the errno, then it + * does not indicate an error at the application + * level, so we degrade the log severity to DEBUG. + */ + if (errnomask && errno < ERRNOMASK_MAX && +- GET_ERRNO_MASK(errnomask, errno)) ++ GET_ERRNO_MASK(errnomask, errno)) { + loglevel = GF_LOG_DEBUG; ++ errno_degraded = _gf_true; ++ } + + switch (errno) { + /* The listed errnos are FUSE status indicators, +@@ -235,33 +251,43 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + */ + case ENOENT: + errdesc = "ENOENT"; ++ ACCOUNT_ERRNO(ENOENT); + break; + case ENOTDIR: + errdesc = "ENOTDIR"; ++ ACCOUNT_ERRNO(ENOTDIR); + break; + case ENODEV: + errdesc = "ENODEV"; ++ ACCOUNT_ERRNO(ENODEV); + break; + case EPERM: + errdesc = "EPERM"; ++ ACCOUNT_ERRNO(EPERM); + break; + case ENOMEM: + errdesc = "ENOMEM"; ++ ACCOUNT_ERRNO(ENOMEM); + break; + case ENOTCONN: + errdesc = "ENOTCONN"; ++ ACCOUNT_ERRNO(ENOTCONN); + break; + case ECONNREFUSED: + errdesc = "ECONNREFUSED"; ++ ACCOUNT_ERRNO(ECONNREFUSED); + break; + case EOVERFLOW: + errdesc = "EOVERFLOW"; ++ ACCOUNT_ERRNO(EOVERFLOW); + break; + case EBUSY: + errdesc = "EBUSY"; ++ ACCOUNT_ERRNO(EBUSY); + break; + case ENOTEMPTY: + errdesc = "ENOTEMPTY"; ++ ACCOUNT_ERRNO(ENOTEMPTY); + break; + default: + errdesc = strerror(errno); +@@ -269,7 +295,13 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + + gf_log_callingfn("glusterfs-fuse", loglevel, + "writing to fuse device failed: %s", errdesc); ++ if (errno_promoted) ++ gf_log("glusterfs-fuse", GF_LOG_WARNING, ++ "writing to fuse device yielded %s %d times", errdesc, ++ UINT8_MAX + 1); + return errno; ++ ++#undef ACCOUNT_ERRNO + } + + fouh = iov_out[0].iov_base; +@@ -6584,6 +6616,8 @@ init(xlator_t *this_xl) + INIT_LIST_HEAD(&priv->interrupt_list); + pthread_mutex_init(&priv->interrupt_mutex, NULL); + ++ pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL); ++ + /* get options from option dictionary */ + ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string); + if (ret == -1 || value_string == NULL) { +diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h +index d2d462c..2fb15a6 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.h ++++ b/xlators/mount/fuse/src/fuse-bridge.h +@@ -78,6 +78,20 @@ typedef struct fuse_in_header fuse_in_header_t; + typedef void(fuse_handler_t)(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf); + ++enum fusedev_errno { ++ FUSEDEV_ENOENT, ++ FUSEDEV_ENOTDIR, ++ FUSEDEV_ENODEV, ++ FUSEDEV_EPERM, ++ FUSEDEV_ENOMEM, ++ FUSEDEV_ENOTCONN, ++ FUSEDEV_ECONNREFUSED, ++ FUSEDEV_EOVERFLOW, ++ FUSEDEV_EBUSY, ++ FUSEDEV_ENOTEMPTY, ++ FUSEDEV_EMAXPLUS ++}; ++ + struct fuse_private { + int fd; + uint32_t proto_minor; +@@ -192,6 +206,10 @@ struct fuse_private { + /* LRU Limit, if not set, default is 64k for now */ + uint32_t lru_limit; + uint32_t invalidate_limit; ++ ++ /* counters for fusdev errnos */ ++ uint8_t fusedev_errno_cnt[FUSEDEV_EMAXPLUS]; ++ pthread_mutex_t fusedev_errno_cnt_mutex; + }; + typedef struct fuse_private fuse_private_t; + +-- +1.8.3.1 + diff --git a/SOURCES/0442-fuse-correctly-handle-setxattr-values.patch b/SOURCES/0442-fuse-correctly-handle-setxattr-values.patch new file mode 100644 index 0000000..4be3b85 --- /dev/null +++ b/SOURCES/0442-fuse-correctly-handle-setxattr-values.patch @@ -0,0 +1,139 @@ +From 56c8ef4a64506c64aeb95d5a2c38d7107f90ac3a Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Tue, 5 Feb 2019 16:57:52 +0100 +Subject: [PATCH 442/449] fuse: correctly handle setxattr values + +The setxattr function receives a pointer to raw data, which may not be +null-terminated. When this data needs to be interpreted as a string, an +explicit null termination needs to be added before using the value. + +Upstream patch https://review.gluster.org/#/c/glusterfs/+/22157 +> Change-Id: Id110f9b215b22786da5782adec9449ce38d0d563 +> updates: bz#1193929 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Note: this change is not addressing the issue of bz 1787310, +indeed it is prerequisite for other changes that do. + +BUG: 1787310 +Change-Id: I56417b130eb2a1f388108456c905a577eb658793 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202758 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/xlator.h | 2 +- + libglusterfs/src/xlator.c | 28 +++++++++++++++++++++++++--- + xlators/mount/fuse/src/fuse-bridge.c | 20 ++++++++++++++++---- + 3 files changed, 42 insertions(+), 8 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index db04c4d..8650ccc 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -1043,7 +1043,7 @@ xlator_mem_acct_init(xlator_t *xl, int num_types); + void + xlator_mem_acct_unref(struct mem_acct *mem_acct); + int +-is_gf_log_command(xlator_t *trans, const char *name, char *value); ++is_gf_log_command(xlator_t *trans, const char *name, char *value, size_t size); + int + glusterd_check_log_level(const char *value); + int +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 6bd4f09..108b96a 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -1278,8 +1278,21 @@ xlator_destroy(xlator_t *xl) + return 0; + } + ++static int32_t ++gf_bin_to_string(char *dst, size_t size, void *src, size_t len) ++{ ++ if (len >= size) { ++ return EINVAL; ++ } ++ ++ memcpy(dst, src, len); ++ dst[len] = 0; ++ ++ return 0; ++} ++ + int +-is_gf_log_command(xlator_t *this, const char *name, char *value) ++is_gf_log_command(xlator_t *this, const char *name, char *value, size_t size) + { + xlator_t *trav = NULL; + char key[1024] = { +@@ -1291,7 +1304,11 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) + glusterfs_ctx_t *ctx = NULL; + + if (!strcmp("trusted.glusterfs.syslog", name)) { +- ret = gf_string2boolean(value, &syslog_flag); ++ ret = gf_bin_to_string(key, sizeof(key), value, size); ++ if (ret != 0) { ++ goto out; ++ } ++ ret = gf_string2boolean(key, &syslog_flag); + if (ret) { + ret = EOPNOTSUPP; + goto out; +@@ -1307,7 +1324,12 @@ is_gf_log_command(xlator_t *this, const char *name, char *value) + if (fnmatch("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE)) + goto out; + +- log_level = glusterd_check_log_level(value); ++ ret = gf_bin_to_string(key, sizeof(key), value, size); ++ if (ret != 0) { ++ goto out; ++ } ++ ++ log_level = glusterd_check_log_level(key); + if (log_level == -1) { + ret = EOPNOTSUPP; + goto out; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 2e7584c..cfad2b4 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4112,7 +4112,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + + /* Check if the command is for changing the log + level of process or specific xlator */ +- ret = is_gf_log_command(this, name, value); ++ ret = is_gf_log_command(this, name, value, fsi->size); + if (ret >= 0) { + op_errno = ret; + goto done; +@@ -4159,11 +4159,23 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + * fixups to make sure that's the case. To avoid nasty + * surprises, allocate an extra byte and add a NUL here. + */ +- dict_value = memdup(value, fsi->size + 1); ++ dict_value = GF_MALLOC(fsi->size + 1, gf_common_mt_char); ++ if (dict_value == NULL) { ++ gf_log("glusterfs-fuse", GF_LOG_ERROR, ++ "%" PRIu64 ": SETXATTR value allocation failed", ++ finh->unique); ++ op_errno = ENOMEM; ++ goto done; ++ } ++ memcpy(dict_value, value, fsi->size); + dict_value[fsi->size] = '\0'; + } +- dict_set(state->xattr, newkey, +- data_from_dynptr((void *)dict_value, fsi->size)); ++ ret = dict_set_dynptr(state->xattr, newkey, dict_value, fsi->size); ++ if (ret < 0) { ++ op_errno = -ret; ++ GF_FREE(dict_value); ++ goto done; ++ } + + state->flags = fsi->flags; + state->name = newkey; +-- +1.8.3.1 + diff --git a/SOURCES/0443-fuse-fix-high-sev-coverity-issue.patch b/SOURCES/0443-fuse-fix-high-sev-coverity-issue.patch new file mode 100644 index 0000000..7c5e9c0 --- /dev/null +++ b/SOURCES/0443-fuse-fix-high-sev-coverity-issue.patch @@ -0,0 +1,55 @@ +From 3ac3312d63b9dc3c15cd8765ab8b7c601b007500 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Tue, 19 Mar 2019 22:51:14 +0530 +Subject: [PATCH 443/449] fuse : fix high sev coverity issue + +This patch fixed coverity issue in fuse-bridge.c. + +CID : 1398630 : Resource leak +CID : 1399757 : Uninitialized pointer read + +Upstream patch https://review.gluster.org/c/glusterfs/+/22382 +> updates: bz#789278 +> +> Change-Id: I69f8591400ee56a5d215eeac443a8e3d7777db27 +> Signed-off-by: Sunny Kumar <sunkumar@redhat.com> + +BUG: 1787310 +Change-Id: Ib2c9af25019ee57131b3d384fc4b557437e75d3e +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202759 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index cfad2b4..d17320b 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4174,6 +4174,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + if (ret < 0) { + op_errno = -ret; + GF_FREE(dict_value); ++ GF_FREE(newkey); + goto done; + } + +@@ -5963,7 +5964,12 @@ fuse_thread_proc(void *data) + ssize_t res = 0; + struct iobuf *iobuf = NULL; + fuse_in_header_t *finh = NULL; +- struct iovec iov_in[2]; ++ struct iovec iov_in[2] = { ++ { ++ 0, ++ }, ++ }; ++ + void *msg = NULL; + /* we need 512 extra buffer size for BATCH_FORGET fop. By tests, it is + found to be reduces 'REALLOC()' in the loop */ +-- +1.8.3.1 + diff --git a/SOURCES/0444-mount-fuse-Fixing-a-coverity-issue.patch b/SOURCES/0444-mount-fuse-Fixing-a-coverity-issue.patch new file mode 100644 index 0000000..c8e3e8c --- /dev/null +++ b/SOURCES/0444-mount-fuse-Fixing-a-coverity-issue.patch @@ -0,0 +1,40 @@ +From 53a6aed98aad73ff51f884bf815bccfa337eb524 Mon Sep 17 00:00:00 2001 +From: Barak Sason <bsasonro@redhat.com> +Date: Sun, 18 Aug 2019 17:38:09 +0300 +Subject: [PATCH 444/449] mount/fuse - Fixing a coverity issue + +Fixed resource leak of dict_value and newkey variables + +CID: 1398630 + +Upstream patch https://review.gluster.org/c/glusterfs/+/23260 +> Updates: bz#789278 +> +> Change-Id: I589fdc0aecaeb4f446cd29f95bad36ccd7a35beb +> Signed-off-by: Barak Sason <bsasonro@redhat.com> + +BUG: 1787310 +Change-Id: Id191face7b082e2e8d6e62f60b56248688d396f6 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202760 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mount/fuse/src/fuse-bridge.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index d17320b..f61fa39 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4165,6 +4165,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + "%" PRIu64 ": SETXATTR value allocation failed", + finh->unique); + op_errno = ENOMEM; ++ GF_FREE(newkey); + goto done; + } + memcpy(dict_value, value, fsi->size); +-- +1.8.3.1 + diff --git a/SOURCES/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch b/SOURCES/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch new file mode 100644 index 0000000..dea23f2 --- /dev/null +++ b/SOURCES/0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch @@ -0,0 +1,481 @@ +From dc03340654d921916ac3890d713fc84ef4bb1e28 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Sat, 29 Sep 2018 13:15:35 +0530 +Subject: [PATCH 445/449] feature/changelog: Avoid thread creation if xlator is + not enabled + +Problem: +Changelog creates threads even if the changelog is not enabled + +Background: +Changelog xlator broadly does two things + 1. Journalling - Cosumers are geo-rep and glusterfind + 2. Event Notification for registered events like (open, release etc) - + Consumers are bitrot, geo-rep + +The existing option "changelog.changelog" controls journalling and +there is no option to control event notification and is enabled by +default. So when bitrot/geo-rep is not enabled on the volume, threads +and resources(rpc and rbuf) related to event notifications consumes +resources and cpu cycle which is unnecessary. + +Solution: +The solution is to have two different options as below. + 1. changelog-notification : Event notifications + 2. changelog : Journalling + +This patch introduces the option "changelog-notification" which is +not exposed to user. When either bitrot or changelog (journalling) +is enabled, it internally enbales 'changelog-notification'. But +once the 'changelog-notification' is enabled, it will not be disabled +for the life time of the brick process even after bitrot and changelog +is disabled. As of now, rpc resource cleanup has lot of races and is +difficult to cleanup cleanly. If allowed, it leads to memory leaks +and crashes on enable/disable of bitrot or changelog (journal) in a +loop. Hence to be safer, the event notification is not disabled within +lifetime of process once enabled. + +> Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a +> Updates: #475 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry pick from commit 6de80bcd6366778ac34ce58ec496fa08cc02bd0b) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21896/) + +BUG: 1790336 +Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202778 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-lib/src/rpcsvc.c | 26 ++-- + tests/basic/changelog/changelog-history.t | 12 +- + tests/bugs/bitrot/bug-1227996.t | 1 - + tests/bugs/bitrot/bug-1245981.t | 4 +- + xlators/features/changelog/src/changelog-helpers.h | 4 + + .../features/changelog/src/changelog-rpc-common.c | 3 + + xlators/features/changelog/src/changelog.c | 149 +++++++++++++++------ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 13 ++ + 8 files changed, 154 insertions(+), 58 deletions(-) + +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index b058932..3f184bf 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -1865,6 +1865,18 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) + goto out; + } + ++ pthread_rwlock_rdlock(&svc->rpclock); ++ { ++ list_for_each_entry(prog, &svc->programs, program) ++ { ++ if ((prog->prognum == program->prognum) && ++ (prog->progver == program->progver)) { ++ break; ++ } ++ } ++ } ++ pthread_rwlock_unlock(&svc->rpclock); ++ + ret = rpcsvc_program_unregister_portmap(program); + if (ret == -1) { + gf_log(GF_RPCSVC, GF_LOG_ERROR, +@@ -1881,17 +1893,6 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) + goto out; + } + #endif +- pthread_rwlock_rdlock(&svc->rpclock); +- { +- list_for_each_entry(prog, &svc->programs, program) +- { +- if ((prog->prognum == program->prognum) && +- (prog->progver == program->progver)) { +- break; +- } +- } +- } +- pthread_rwlock_unlock(&svc->rpclock); + + gf_log(GF_RPCSVC, GF_LOG_DEBUG, + "Program unregistered: %s, Num: %d," +@@ -1912,6 +1913,9 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program) + + ret = 0; + out: ++ if (prog) ++ GF_FREE(prog); ++ + if (ret == -1) { + if (program) { + gf_log(GF_RPCSVC, GF_LOG_ERROR, +diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t +index 3ce4098..b56e247 100644 +--- a/tests/basic/changelog/changelog-history.t ++++ b/tests/basic/changelog/changelog-history.t +@@ -5,6 +5,7 @@ + + cleanup; + ++SCRIPT_TIMEOUT=300 + HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog + build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog + +@@ -68,18 +69,21 @@ TEST $CLI volume set $V0 changelog.changelog off + sleep 3 + time_after_disable=$(date '+%s') + ++TEST $CLI volume set $V0 changelog.changelog on ++sleep 5 ++ + #Passes, gives the changelogs till continuous changelogs are available + # but returns 1 +-EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2 ++EXPECT_WITHIN 10 "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2 + + #Fails as start falls between htime files +-EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1 ++EXPECT_WITHIN 10 "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1 + + #Passes as start and end falls in same htime file +-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2 ++EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2 + + #Passes, gives the changelogs till continuous changelogs are available +-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable ++EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable + + TEST rm $HISTORY_BIN_PATH/get-history + +diff --git a/tests/bugs/bitrot/bug-1227996.t b/tests/bugs/bitrot/bug-1227996.t +index 47ebc42..121c7b5 100644 +--- a/tests/bugs/bitrot/bug-1227996.t ++++ b/tests/bugs/bitrot/bug-1227996.t +@@ -17,7 +17,6 @@ TEST pidof glusterd; + ## Lets create and start the volume + TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 + TEST $CLI volume start $V0 +- + ## Enable bitrot on volume $V0 + TEST $CLI volume bitrot $V0 enable + +diff --git a/tests/bugs/bitrot/bug-1245981.t b/tests/bugs/bitrot/bug-1245981.t +index 2bed4d9..f395525 100644 +--- a/tests/bugs/bitrot/bug-1245981.t ++++ b/tests/bugs/bitrot/bug-1245981.t +@@ -47,9 +47,9 @@ touch $M0/5 + sleep `expr $SLEEP_TIME \* 2` + + backpath=$(get_backend_paths $fname) +-TEST getfattr -m . -n trusted.bit-rot.signature $backpath ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath + + backpath=$(get_backend_paths $M0/new_file) +-TEST getfattr -m . -n trusted.bit-rot.signature $backpath ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath + + cleanup; +diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h +index 517c4dc..3afacc9 100644 +--- a/xlators/features/changelog/src/changelog-helpers.h ++++ b/xlators/features/changelog/src/changelog-helpers.h +@@ -190,8 +190,12 @@ typedef struct changelog_ev_selector { + + /* changelog's private structure */ + struct changelog_priv { ++ /* changelog journalling */ + gf_boolean_t active; + ++ /* changelog live notifications */ ++ gf_boolean_t rpc_active; ++ + /* to generate unique socket file per brick */ + char *changelog_brick; + +diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c +index dcdcfb1..f2d1853 100644 +--- a/xlators/features/changelog/src/changelog-rpc-common.c ++++ b/xlators/features/changelog/src/changelog-rpc-common.c +@@ -263,6 +263,9 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile, + struct rpcsvc_program *prog = NULL; + rpc_transport_t *trans = NULL; + ++ if (!rpc) ++ return; ++ + while (*progs) { + prog = *progs; + (void)rpcsvc_program_unregister(rpc, prog); +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index d9025f3..ff06c09 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = { + }, + }; + ++static int ++changelog_init_rpc(xlator_t *this, changelog_priv_t *priv); ++ ++static int ++changelog_init(xlator_t *this, changelog_priv_t *priv); ++ + /* Entry operations - TYPE III */ + + /** +@@ -2008,6 +2014,11 @@ notify(xlator_t *this, int event, void *data, ...) + uint64_t clntcnt = 0; + changelog_clnt_t *conn = NULL; + gf_boolean_t cleanup_notify = _gf_false; ++ char sockfile[UNIX_PATH_MAX] = { ++ 0, ++ }; ++ rpcsvc_listener_t *listener = NULL; ++ rpcsvc_listener_t *next = NULL; + + INIT_LIST_HEAD(&queue); + +@@ -2021,23 +2032,40 @@ notify(xlator_t *this, int event, void *data, ...) + "cleanup changelog rpc connection of brick %s", + priv->victim->name); + +- this->cleanup_starting = 1; +- changelog_destroy_rpc_listner(this, priv); +- conn = &priv->connections; +- if (conn) +- changelog_ev_cleanup_connections(this, conn); +- xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); +- clntcnt = GF_ATOMIC_GET(priv->clntcnt); +- +- if (!xprtcnt && !clntcnt) { +- LOCK(&priv->lock); +- { +- cleanup_notify = priv->notify_down; +- priv->notify_down = _gf_true; ++ if (priv->rpc_active) { ++ this->cleanup_starting = 1; ++ changelog_destroy_rpc_listner(this, priv); ++ conn = &priv->connections; ++ if (conn) ++ changelog_ev_cleanup_connections(this, conn); ++ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); ++ clntcnt = GF_ATOMIC_GET(priv->clntcnt); ++ if (!xprtcnt && !clntcnt) { ++ LOCK(&priv->lock); ++ { ++ cleanup_notify = priv->notify_down; ++ priv->notify_down = _gf_true; ++ } ++ UNLOCK(&priv->lock); ++ list_for_each_entry_safe(listener, next, &priv->rpc->listeners, ++ list) ++ { ++ if (listener->trans) { ++ rpc_transport_unref(listener->trans); ++ } ++ } ++ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, ++ UNIX_PATH_MAX); ++ sys_unlink(sockfile); ++ if (priv->rpc) { ++ rpcsvc_destroy(priv->rpc); ++ priv->rpc = NULL; ++ } ++ if (!cleanup_notify) ++ default_notify(this, GF_EVENT_PARENT_DOWN, data); + } +- UNLOCK(&priv->lock); +- if (!cleanup_notify) +- default_notify(this, GF_EVENT_PARENT_DOWN, data); ++ } else { ++ default_notify(this, GF_EVENT_PARENT_DOWN, data); + } + goto out; + } +@@ -2425,6 +2453,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv) + LOCK_DESTROY(&priv->bflags.lock); + } + ++static void ++changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) ++{ ++ /* terminate rpc server */ ++ if (!this->cleanup_starting) ++ changelog_destroy_rpc_listner(this, priv); ++ ++ (void)changelog_cleanup_rpc_threads(this, priv); ++ /* cleanup rot buffs */ ++ rbuf_dtor(priv->rbuf); ++ ++ /* cleanup poller thread */ ++ if (priv->poller) ++ (void)changelog_thread_cleanup(this, priv->poller); ++} ++ + int + reconfigure(xlator_t *this, dict_t *options) + { +@@ -2433,6 +2477,9 @@ reconfigure(xlator_t *this, dict_t *options) + changelog_priv_t *priv = NULL; + gf_boolean_t active_earlier = _gf_true; + gf_boolean_t active_now = _gf_true; ++ gf_boolean_t rpc_active_earlier = _gf_true; ++ gf_boolean_t rpc_active_now = _gf_true; ++ gf_boolean_t iniate_rpc = _gf_false; + changelog_time_slice_t *slice = NULL; + changelog_log_data_t cld = { + 0, +@@ -2454,6 +2501,7 @@ reconfigure(xlator_t *this, dict_t *options) + + ret = -1; + active_earlier = priv->active; ++ rpc_active_earlier = priv->rpc_active; + + /* first stop the rollover and the fsync thread */ + changelog_cleanup_helper_threads(this, priv); +@@ -2487,6 +2535,29 @@ reconfigure(xlator_t *this, dict_t *options) + goto out; + + GF_OPTION_RECONF("changelog", active_now, options, bool, out); ++ GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool, ++ out); ++ ++ /* If journalling is enabled, enable rpc notifications */ ++ if (active_now && !active_earlier) { ++ if (!rpc_active_earlier) ++ iniate_rpc = _gf_true; ++ } ++ ++ if (rpc_active_now && !rpc_active_earlier) { ++ iniate_rpc = _gf_true; ++ } ++ ++ /* TODO: Disable of changelog-notifications is not supported for now ++ * as there is no clean way of cleaning up of rpc resources ++ */ ++ ++ if (iniate_rpc) { ++ ret = changelog_init_rpc(this, priv); ++ if (ret) ++ goto out; ++ priv->rpc_active = _gf_true; ++ } + + /** + * changelog_handle_change() handles changes that could possibly +@@ -2618,6 +2689,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv) + goto dealloc_2; + + GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2); ++ GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2); + GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2); + + GF_OPTION_INIT("op-mode", tmp, str, dealloc_2); +@@ -2656,22 +2728,6 @@ error_return: + return -1; + } + +-static void +-changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) +-{ +- /* terminate rpc server */ +- if (!this->cleanup_starting) +- changelog_destroy_rpc_listner(this, priv); +- +- (void)changelog_cleanup_rpc_threads(this, priv); +- /* cleanup rot buffs */ +- rbuf_dtor(priv->rbuf); +- +- /* cleanup poller thread */ +- if (priv->poller) +- (void)changelog_thread_cleanup(this, priv->poller); +-} +- + static int + changelog_init_rpc(xlator_t *this, changelog_priv_t *priv) + { +@@ -2768,10 +2824,13 @@ init(xlator_t *this) + INIT_LIST_HEAD(&priv->queue); + priv->barrier_enabled = _gf_false; + +- /* RPC ball rolling.. */ +- ret = changelog_init_rpc(this, priv); +- if (ret) +- goto cleanup_barrier; ++ if (priv->rpc_active || priv->active) { ++ /* RPC ball rolling.. */ ++ ret = changelog_init_rpc(this, priv); ++ if (ret) ++ goto cleanup_barrier; ++ priv->rpc_active = _gf_true; ++ } + + ret = changelog_init(this, priv); + if (ret) +@@ -2783,7 +2842,9 @@ init(xlator_t *this) + return 0; + + cleanup_rpc: +- changelog_cleanup_rpc(this, priv); ++ if (priv->rpc_active) { ++ changelog_cleanup_rpc(this, priv); ++ } + cleanup_barrier: + changelog_barrier_pthread_destroy(priv); + cleanup_options: +@@ -2808,9 +2869,10 @@ fini(xlator_t *this) + priv = this->private; + + if (priv) { +- /* terminate RPC server/threads */ +- changelog_cleanup_rpc(this, priv); +- ++ if (priv->active || priv->rpc_active) { ++ /* terminate RPC server/threads */ ++ changelog_cleanup_rpc(this, priv); ++ } + /* call barrier_disable to cancel timer */ + if (priv->barrier_enabled) + __chlog_barrier_disable(this, &queue); +@@ -2879,6 +2941,13 @@ struct volume_options options[] = { + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_BASIC, + .tags = {"journal", "georep", "glusterfind"}}, ++ {.key = {"changelog-notification"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .default_value = "off", ++ .description = "enable/disable changelog live notification", ++ .op_version = {3}, ++ .level = OPT_STATUS_BASIC, ++ .tags = {"bitrot", "georep"}}, + {.key = {"changelog-brick"}, + .type = GF_OPTION_TYPE_PATH, + .description = "brick path to generate unique socket file name." +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 16346e7..13f84ea 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -1876,6 +1876,19 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + ret = xlator_set_fixed_option(xl, "changelog-dir", changelog_basepath); + if (ret) + goto out; ++ ++ ret = glusterd_is_bitrot_enabled(volinfo); ++ if (ret == -1) { ++ goto out; ++ } else if (ret) { ++ ret = xlator_set_fixed_option(xl, "changelog-notification", "on"); ++ if (ret) ++ goto out; ++ } else { ++ ret = xlator_set_fixed_option(xl, "changelog-notification", "off"); ++ if (ret) ++ goto out; ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0446-bitrot-Make-number-of-signer-threads-configurable.patch b/SOURCES/0446-bitrot-Make-number-of-signer-threads-configurable.patch new file mode 100644 index 0000000..8eb2089 --- /dev/null +++ b/SOURCES/0446-bitrot-Make-number-of-signer-threads-configurable.patch @@ -0,0 +1,594 @@ +From 866a4c49ad9c5a9125814a9f843d4c7fd967ab2b Mon Sep 17 00:00:00 2001 +From: Kotresh HR <khiremat@redhat.com> +Date: Mon, 3 Feb 2020 18:10:17 +0530 +Subject: [PATCH 446/449] bitrot: Make number of signer threads configurable + +The number of signing process threads (glfs_brpobj) +is set to 4 by default. The recommendation is to set +it to number of cores available. This patch makes it +configurable as follows + +gluster vol bitrot <volname> signer-threads <count> + +> fixes: bz#1797869 +> Change-Id: Ia883b3e5e34e0bc8d095243508d320c9c9c58adc +> Signed-off-by: Kotresh HR <khiremat@redhat.com> +> (Cherry pick from commit 8fad76650bd85463708f59d2518f5b764ae4c702) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24091/) + +BUG: 1790336 +Change-Id: Ia883b3e5e34e0bc8d095243508d320c9c9c58adc +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202780 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 29 +++++++- + cli/src/cli-cmd-volume.c | 12 +++ + doc/gluster.8 | 6 ++ + libglusterfs/src/glusterfs/common-utils.h | 1 + + rpc/xdr/src/cli1-xdr.x | 1 + + tests/bitrot/br-signer-threads-config-1797869.t | 73 +++++++++++++++++++ + xlators/features/bit-rot/src/bitd/bit-rot.c | 45 +++++++++--- + xlators/features/bit-rot/src/bitd/bit-rot.h | 20 ++--- + .../bit-rot/src/stub/bit-rot-stub-mem-types.h | 1 + + xlators/mgmt/glusterd/src/glusterd-bitrot.c | 85 ++++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 16 ++-- + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 9 +++ + 12 files changed, 270 insertions(+), 28 deletions(-) + create mode 100644 tests/bitrot/br-signer-threads-config-1797869.t + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 7446b95..5fd05f4 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -5661,7 +5661,7 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + char *volname = NULL; + char *opwords[] = { + "enable", "disable", "scrub-throttle", "scrub-frequency", "scrub", +- "signing-time", NULL}; ++ "signing-time", "signer-threads", NULL}; + char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL}; + char *scrub_freq_values[] = {"hourly", "daily", "weekly", "biweekly", + "monthly", "minute", NULL}; +@@ -5669,6 +5669,7 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + dict_t *dict = NULL; + gf_bitrot_type type = GF_BITROT_OPTION_TYPE_NONE; + int32_t expiry_time = 0; ++ int32_t signer_th_count = 0; + + GF_ASSERT(words); + GF_ASSERT(options); +@@ -5849,6 +5850,31 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + } + goto set_type; + } ++ } else if (!strcmp(words[3], "signer-threads")) { ++ if (!words[4]) { ++ cli_err( ++ "Missing signer-thread value for bitrot " ++ "option"); ++ ret = -1; ++ goto out; ++ } else { ++ type = GF_BITROT_OPTION_TYPE_SIGNER_THREADS; ++ ++ signer_th_count = strtol(words[4], NULL, 0); ++ if (signer_th_count < 1) { ++ cli_err("signer-thread count should not be less than 1"); ++ ret = -1; ++ goto out; ++ } ++ ++ ret = dict_set_uint32(dict, "signer-threads", ++ (unsigned int)signer_th_count); ++ if (ret) { ++ cli_out("Failed to set dict for bitrot"); ++ goto out; ++ } ++ goto set_type; ++ } + } else { + cli_err( + "Invalid option %s for bitrot. Please enter valid " +@@ -5857,7 +5883,6 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + ret = -1; + goto out; + } +- + set_type: + ret = dict_set_int32(dict, "type", type); + if (ret < 0) +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index f33fc99..72504ca 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3236,6 +3236,16 @@ struct cli_cmd bitrot_cmds[] = { + {"volume bitrot <VOLNAME> {enable|disable}", NULL, /*cli_cmd_bitrot_cbk,*/ + "Enable/disable bitrot for volume <VOLNAME>"}, + ++ {"volume bitrot <VOLNAME> signing-time <time-in-secs>", ++ NULL, /*cli_cmd_bitrot_cbk,*/ ++ "Waiting time for an object after last fd is closed to start signing " ++ "process"}, ++ ++ {"volume bitrot <VOLNAME> signer-threads <count>", ++ NULL, /*cli_cmd_bitrot_cbk,*/ ++ "Number of signing process threads. Usually set to number of available " ++ "cores"}, ++ + {"volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive}", + NULL, /*cli_cmd_bitrot_cbk,*/ + "Set the speed of the scrubber for volume <VOLNAME>"}, +@@ -3251,6 +3261,8 @@ struct cli_cmd bitrot_cmds[] = { + "the scrubber. ondemand starts the scrubber immediately."}, + + {"volume bitrot <VOLNAME> {enable|disable}\n" ++ "volume bitrot <VOLNAME> signing-time <time-in-secs>\n" ++ "volume bitrot <VOLNAME> signer-threads <count>\n" + "volume bitrot <volname> scrub-throttle {lazy|normal|aggressive}\n" + "volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly" + "|monthly}\n" +diff --git a/doc/gluster.8 b/doc/gluster.8 +index 66bdb48..084346d 100644 +--- a/doc/gluster.8 ++++ b/doc/gluster.8 +@@ -244,6 +244,12 @@ Use "!<OPTION>" to reset option <OPTION> to default value. + \fB\ volume bitrot <VOLNAME> {enable|disable} \fR + Enable/disable bitrot for volume <VOLNAME> + .TP ++\fB\ volume bitrot <VOLNAME> signing-time <time-in-secs> \fR ++Waiting time for an object after last fd is closed to start signing process. ++.TP ++\fB\ volume bitrot <VOLNAME> signer-threads <count> \fR ++Number of signing process threads. Usually set to number of available cores. ++.TP + \fB\ volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive} \fR + Scrub-throttle value is a measure of how fast or slow the scrubber scrubs the filesystem for volume <VOLNAME> + .TP +diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h +index 0e2ecc7..f0a0a41 100644 +--- a/libglusterfs/src/glusterfs/common-utils.h ++++ b/libglusterfs/src/glusterfs/common-utils.h +@@ -126,6 +126,7 @@ trap(void); + + /* Default value of signing waiting time to sign a file for bitrot */ + #define SIGNING_TIMEOUT "120" ++#define BR_WORKERS "4" + + /* xxhash */ + #define GF_XXH64_DIGEST_LENGTH 8 +diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x +index a32c864..777cb00 100644 +--- a/rpc/xdr/src/cli1-xdr.x ++++ b/rpc/xdr/src/cli1-xdr.x +@@ -68,6 +68,7 @@ enum gf_bitrot_type { + GF_BITROT_OPTION_TYPE_EXPIRY_TIME, + GF_BITROT_CMD_SCRUB_STATUS, + GF_BITROT_CMD_SCRUB_ONDEMAND, ++ GF_BITROT_OPTION_TYPE_SIGNER_THREADS, + GF_BITROT_OPTION_TYPE_MAX + }; + +diff --git a/tests/bitrot/br-signer-threads-config-1797869.t b/tests/bitrot/br-signer-threads-config-1797869.t +new file mode 100644 +index 0000000..657ef3e +--- /dev/null ++++ b/tests/bitrot/br-signer-threads-config-1797869.t +@@ -0,0 +1,73 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../cluster.rc ++ ++function get_bitd_count_1 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l ++} ++ ++function get_bitd_count_2 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l ++} ++ ++function get_bitd_pid_1 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | awk '{print $2}' ++} ++ ++function get_bitd_pid_2 { ++ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | awk '{print $2}' ++} ++ ++function get_signer_th_count_1 { ++ ps -eL | grep $(get_bitd_pid_1) | grep glfs_brpobj | wc -l ++} ++ ++function get_signer_th_count_2 { ++ ps -eL | grep $(get_bitd_pid_2) | grep glfs_brpobj | wc -l ++} ++ ++cleanup; ++ ++TEST launch_cluster 2 ++ ++TEST $CLI_1 peer probe $H2; ++EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count; ++ ++TEST $CLI_1 volume create $V0 $H1:$B1 ++TEST $CLI_1 volume create $V1 $H2:$B2 ++EXPECT 'Created' volinfo_field_1 $V0 'Status'; ++EXPECT 'Created' volinfo_field_1 $V1 'Status'; ++ ++TEST $CLI_1 volume start $V0 ++TEST $CLI_1 volume start $V1 ++EXPECT 'Started' volinfo_field_1 $V0 'Status'; ++EXPECT 'Started' volinfo_field_1 $V1 'Status'; ++ ++#Enable bitrot ++TEST $CLI_1 volume bitrot $V0 enable ++TEST $CLI_1 volume bitrot $V1 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2 ++ ++old_bitd_pid_1=$(get_bitd_pid_1) ++old_bitd_pid_2=$(get_bitd_pid_2) ++TEST $CLI_1 volume bitrot $V0 signer-threads 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1 ++EXPECT_NOT "$old_bitd_pid_1" get_bitd_pid_1; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2 ++EXPECT "$old_bitd_pid_2" get_bitd_pid_2; ++ ++old_bitd_pid_1=$(get_bitd_pid_1) ++old_bitd_pid_2=$(get_bitd_pid_2) ++TEST $CLI_1 volume bitrot $V1 signer-threads 2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_signer_th_count_2 ++EXPECT_NOT "$old_bitd_pid_2" get_bitd_pid_2; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1 ++EXPECT "$old_bitd_pid_1" get_bitd_pid_1; ++ ++cleanup; +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c +index 7b1c5dc..b8feef7 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.c ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c +@@ -1734,22 +1734,26 @@ out: + return 0; + } + +-/** +- * Initialize signer specific structures, spawn worker threads. +- */ +- + static void + br_fini_signer(xlator_t *this, br_private_t *priv) + { + int i = 0; + +- for (; i < BR_WORKERS; i++) { ++ if (priv == NULL) ++ return; ++ ++ for (; i < priv->signer_th_count; i++) { + (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); + } ++ GF_FREE(priv->obj_queue->workers); + + pthread_cond_destroy(&priv->object_cond); + } + ++/** ++ * Initialize signer specific structures, spawn worker threads. ++ */ ++ + static int32_t + br_init_signer(xlator_t *this, br_private_t *priv) + { +@@ -1769,7 +1773,12 @@ br_init_signer(xlator_t *this, br_private_t *priv) + goto cleanup_cond; + INIT_LIST_HEAD(&priv->obj_queue->objects); + +- for (i = 0; i < BR_WORKERS; i++) { ++ priv->obj_queue->workers = GF_CALLOC( ++ priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t); ++ if (!priv->obj_queue->workers) ++ goto cleanup_obj_queue; ++ ++ for (i = 0; i < priv->signer_th_count; i++) { + ret = gf_thread_create(&priv->obj_queue->workers[i], NULL, + br_process_object, this, "brpobj"); + if (ret != 0) { +@@ -1787,7 +1796,9 @@ cleanup_threads: + for (i--; i >= 0; i--) { + (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]); + } ++ GF_FREE(priv->obj_queue->workers); + ++cleanup_obj_queue: + GF_FREE(priv->obj_queue); + + cleanup_cond: +@@ -1840,7 +1851,7 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) + if (contribution == 0) + contribution = 1; + spec.rate = BR_HASH_CALC_READ_SIZE * contribution; +- spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; ++ spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE; + + #endif + +@@ -1860,11 +1871,16 @@ br_rate_limit_signer(xlator_t *this, int child_count, int numbricks) + static int32_t + br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options) + { +- if (options) ++ if (options) { + GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32, + error_return); +- else ++ GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options, ++ uint32, error_return); ++ } else { + GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); ++ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, ++ error_return); ++ } + + return 0; + +@@ -1880,6 +1896,8 @@ br_signer_init(xlator_t *this, br_private_t *priv) + + GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return); + GF_OPTION_INIT("brick-count", numbricks, int32, error_return); ++ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32, ++ error_return); + + ret = br_rate_limit_signer(this, priv->child_count, numbricks); + if (ret) +@@ -2210,6 +2228,15 @@ struct volume_options options[] = { + .description = "Pause/Resume scrub. Upon resume, scrubber " + "continues from where it left off.", + }, ++ { ++ .key = {"signer-threads"}, ++ .type = GF_OPTION_TYPE_INT, ++ .default_value = BR_WORKERS, ++ .op_version = {GD_OP_VERSION_7_0}, ++ .flags = OPT_FLAG_SETTABLE, ++ .description = "Number of signing process threads. As a best " ++ "practice, set this to the number of processor cores", ++ }, + {.key = {NULL}}, + }; + +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h +index a4d4fd7..8ac7dcd 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.h ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.h +@@ -30,12 +30,6 @@ + + #include <openssl/sha.h> + +-/** +- * TODO: make this configurable. As a best practice, set this to the +- * number of processor cores. +- */ +-#define BR_WORKERS 4 +- + typedef enum scrub_throttle { + BR_SCRUB_THROTTLE_VOID = -1, + BR_SCRUB_THROTTLE_LAZY = 0, +@@ -108,12 +102,12 @@ struct br_child { + typedef struct br_child br_child_t; + + struct br_obj_n_workers { +- struct list_head objects; /* queue of objects expired from the +- timer wheel and ready to be picked +- up for signing */ +- pthread_t workers[BR_WORKERS]; /* Threads which pick up the objects +- from the above queue and start +- signing each object */ ++ struct list_head objects; /* queue of objects expired from the ++ timer wheel and ready to be picked ++ up for signing */ ++ pthread_t *workers; /* Threads which pick up the objects ++ from the above queue and start ++ signing each object */ + }; + + struct br_scrubber { +@@ -209,6 +203,8 @@ struct br_private { + + uint32_t expiry_time; /* objects "wait" time */ + ++ uint32_t signer_th_count; /* Number of signing process threads */ ++ + tbf_t *tbf; /* token bucket filter */ + + gf_boolean_t iamscrubber; /* function as a fs scrubber */ +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +index 40bcda1..9d93caf 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +@@ -29,6 +29,7 @@ enum br_mem_types { + gf_br_stub_mt_sigstub_t, + gf_br_mt_br_child_event_t, + gf_br_stub_mt_misc, ++ gf_br_mt_br_worker_t, + gf_br_stub_mt_end, + }; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c +index c653249..f79af2d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-bitrot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c +@@ -34,6 +34,7 @@ const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = { + [GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency", + [GF_BITROT_OPTION_TYPE_SCRUB] = "scrub", + [GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time", ++ [GF_BITROT_OPTION_TYPE_SIGNER_THREADS] = "signer-threads", + }; + + int +@@ -354,6 +355,81 @@ out: + return ret; + } + ++static gf_boolean_t ++is_bitd_configure_noop(xlator_t *this, glusterd_volinfo_t *volinfo) ++{ ++ gf_boolean_t noop = _gf_true; ++ glusterd_brickinfo_t *brickinfo = NULL; ++ ++ if (!glusterd_is_bitrot_enabled(volinfo)) ++ goto out; ++ else if (volinfo->status != GLUSTERD_STATUS_STARTED) ++ goto out; ++ else { ++ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list) ++ { ++ if (!glusterd_is_local_brick(this, volinfo, brickinfo)) ++ continue; ++ noop = _gf_false; ++ return noop; ++ } ++ } ++out: ++ return noop; ++} ++ ++static int ++glusterd_bitrot_signer_threads(glusterd_volinfo_t *volinfo, dict_t *dict, ++ char *key, char **op_errstr) ++{ ++ int32_t ret = -1; ++ uint32_t signer_th_count = 0; ++ uint32_t existing_th_count = 0; ++ xlator_t *this = NULL; ++ glusterd_conf_t *priv = NULL; ++ char dkey[32] = { ++ 0, ++ }; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ priv = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ ++ ret = dict_get_uint32(dict, "signer-threads", &signer_th_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Unable to get bitrot signer thread count."); ++ goto out; ++ } ++ ++ ret = dict_get_uint32(volinfo->dict, key, &existing_th_count); ++ if (ret == 0 && signer_th_count == existing_th_count) { ++ goto out; ++ } ++ ++ snprintf(dkey, sizeof(dkey), "%d", signer_th_count); ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set option %s", key); ++ goto out; ++ } ++ ++ if (!is_bitd_configure_noop(this, volinfo)) { ++ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, ++ PROC_START_NO_WAIT); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL, ++ "Failed to reconfigure bitrot services"); ++ goto out; ++ } ++ } ++out: ++ return ret; ++} ++ + static int + glusterd_bitrot_enable(glusterd_volinfo_t *volinfo, char **op_errstr) + { +@@ -594,6 +670,15 @@ glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + volinfo, dict, "features.expiry-time", op_errstr); + if (ret) + goto out; ++ break; ++ ++ case GF_BITROT_OPTION_TYPE_SIGNER_THREADS: ++ ret = glusterd_bitrot_signer_threads( ++ volinfo, dict, "features.signer-threads", op_errstr); ++ if (ret) ++ goto out; ++ break; ++ + case GF_BITROT_CMD_SCRUB_STATUS: + case GF_BITROT_CMD_SCRUB_ONDEMAND: + break; +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 13f84ea..094a71f 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -4658,6 +4658,12 @@ bitrot_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme, + return -1; + } + ++ if (!strcmp(vme->option, "signer-threads")) { ++ ret = xlator_set_fixed_option(xl, "signer-threads", vme->value); ++ if (ret) ++ return -1; ++ } ++ + return ret; + } + +@@ -4940,18 +4946,18 @@ glusterd_prepare_shd_volume_options_for_tier(glusterd_volinfo_t *volinfo, + dict_t *set_dict) + { + int ret = -1; +- char *key = NULL; ++ char *key = NULL; + +- key = volgen_get_shd_key (volinfo->tier_info.cold_type); ++ key = volgen_get_shd_key(volinfo->tier_info.cold_type); + if (key) { +- ret = dict_set_str (set_dict, key, "enable"); ++ ret = dict_set_str(set_dict, key, "enable"); + if (ret) + goto out; + } + +- key = volgen_get_shd_key (volinfo->tier_info.hot_type); ++ key = volgen_get_shd_key(volinfo->tier_info.hot_type); + if (key) { +- ret = dict_set_str (set_dict, key, "enable"); ++ ret = dict_set_str(set_dict, key, "enable"); + if (ret) + goto out; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 9001b88..62acadf 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3379,6 +3379,15 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .op_version = GD_OP_VERSION_3_7_0, + .type = NO_DOC, + }, ++ { ++ .key = "features.signer-threads", ++ .voltype = "features/bit-rot", ++ .value = BR_WORKERS, ++ .option = "signer-threads", ++ .op_version = GD_OP_VERSION_7_0, ++ .type = NO_DOC, ++ }, ++ /* Upcall translator options */ + /* Upcall translator options */ + { + .key = "features.cache-invalidation", +-- +1.8.3.1 + diff --git a/SOURCES/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch b/SOURCES/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch new file mode 100644 index 0000000..a39b61b --- /dev/null +++ b/SOURCES/0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch @@ -0,0 +1,359 @@ +From 51090a4b3cb000d601083f12d1875547819fc03f Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Wed, 4 Mar 2020 09:17:26 +0530 +Subject: [PATCH 447/449] core[brick_mux]: brick crashed when creating and + deleting volumes over time + +Problem: In brick_mux environment, while volumes are created/stopped in a loop + after running a long time the main brick is crashed.The brick is crashed + because the main brick process was not cleaned up memory for all objects + at the time of detaching a volume. + Below are the objects that are missed at the time of detaching a volume + 1) xlator object for a brick graph + 2) local_pool for posix_lock xlator + 3) rpc object cleanup at quota xlator + 4) inode leak at brick xlator + +Solution: To avoid the crash resolve all leak at the time of detaching a brick +> Change-Id: Ibb6e46c5fba22b9441a88cbaf6b3278823235913 +> updates: #977 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit e589d8de66d3325da8fbbbe44d1a5bd6335e08ab) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24209/) + +BUG: 1790336 +Change-Id: Ibb6e46c5fba22b9441a88cbaf6b3278823235913 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202782 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + libglusterfs/src/glusterfs/glusterfs.h | 1 + + libglusterfs/src/graph.c | 1 + + libglusterfs/src/graph.y | 2 +- + libglusterfs/src/xlator.c | 29 ++++++++---- + xlators/features/changelog/src/changelog.c | 1 + + xlators/features/locks/src/posix.c | 4 ++ + xlators/features/quota/src/quota-enforcer-client.c | 14 +++++- + xlators/features/quota/src/quota.c | 54 ++++++++++++++++++++-- + xlators/features/quota/src/quota.h | 3 ++ + xlators/protocol/server/src/server.c | 12 +++-- + 10 files changed, 103 insertions(+), 18 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 177a020..584846e 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -603,6 +603,7 @@ struct _glusterfs_graph { + int used; /* Should be set when fuse gets + first CHILD_UP */ + uint32_t volfile_checksum; ++ pthread_mutex_t mutex; + }; + typedef struct _glusterfs_graph glusterfs_graph_t; + +diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c +index bb5e67a..1cd92db 100644 +--- a/libglusterfs/src/graph.c ++++ b/libglusterfs/src/graph.c +@@ -1092,6 +1092,7 @@ glusterfs_graph_destroy_residual(glusterfs_graph_t *graph) + ret = xlator_tree_free_memacct(graph->first); + + list_del_init(&graph->list); ++ pthread_mutex_destroy(&graph->mutex); + GF_FREE(graph); + + return ret; +diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y +index 5b92985..5733515 100644 +--- a/libglusterfs/src/graph.y ++++ b/libglusterfs/src/graph.y +@@ -541,7 +541,7 @@ glusterfs_graph_new () + return NULL; + + INIT_LIST_HEAD (&graph->list); +- ++ pthread_mutex_init(&graph->mutex, NULL); + gettimeofday (&graph->dob, NULL); + + return graph; +diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c +index 108b96a..36cc32c 100644 +--- a/libglusterfs/src/xlator.c ++++ b/libglusterfs/src/xlator.c +@@ -938,6 +938,8 @@ xlator_mem_cleanup(xlator_t *this) + xlator_list_t **trav_p = NULL; + xlator_t *top = NULL; + xlator_t *victim = NULL; ++ glusterfs_graph_t *graph = NULL; ++ gf_boolean_t graph_cleanup = _gf_false; + + if (this->call_cleanup || !this->ctx) + return; +@@ -945,6 +947,12 @@ xlator_mem_cleanup(xlator_t *this) + this->call_cleanup = 1; + ctx = this->ctx; + ++ inode_table = this->itable; ++ if (inode_table) { ++ inode_table_destroy(inode_table); ++ this->itable = NULL; ++ } ++ + xlator_call_fini(trav); + + while (prev) { +@@ -953,12 +961,6 @@ xlator_mem_cleanup(xlator_t *this) + prev = trav; + } + +- inode_table = this->itable; +- if (inode_table) { +- inode_table_destroy(inode_table); +- this->itable = NULL; +- } +- + if (this->fini) { + this->fini(this); + } +@@ -968,17 +970,28 @@ xlator_mem_cleanup(xlator_t *this) + if (ctx->active) { + top = ctx->active->first; + LOCK(&ctx->volfile_lock); +- /* TODO here we have leak for xlator node in a graph */ +- /* Need to move only top xlator from a graph */ + for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { + victim = (*trav_p)->xlator; + if (victim->call_cleanup && !strcmp(victim->name, this->name)) { ++ graph_cleanup = _gf_true; + (*trav_p) = (*trav_p)->next; + break; + } + } + UNLOCK(&ctx->volfile_lock); + } ++ ++ if (graph_cleanup) { ++ prev = this; ++ graph = ctx->active; ++ pthread_mutex_lock(&graph->mutex); ++ while (prev) { ++ trav = prev->next; ++ GF_FREE(prev); ++ prev = trav; ++ } ++ pthread_mutex_unlock(&graph->mutex); ++ } + } + + void +diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c +index ff06c09..b54112c 100644 +--- a/xlators/features/changelog/src/changelog.c ++++ b/xlators/features/changelog/src/changelog.c +@@ -2872,6 +2872,7 @@ fini(xlator_t *this) + if (priv->active || priv->rpc_active) { + /* terminate RPC server/threads */ + changelog_cleanup_rpc(this, priv); ++ GF_FREE(priv->ev_dispatcher); + } + /* call barrier_disable to cancel timer */ + if (priv->barrier_enabled) +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 9a14c64..50f1265 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -4102,6 +4102,10 @@ fini(xlator_t *this) + if (!priv) + return; + this->private = NULL; ++ if (this->local_pool) { ++ mem_pool_destroy(this->local_pool); ++ this->local_pool = NULL; ++ } + GF_FREE(priv->brickname); + GF_FREE(priv); + +diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c +index 1a4c2e3..097439d 100644 +--- a/xlators/features/quota/src/quota-enforcer-client.c ++++ b/xlators/features/quota/src/quota-enforcer-client.c +@@ -362,16 +362,28 @@ quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata, + { + xlator_t *this = NULL; + int ret = 0; ++ quota_priv_t *priv = NULL; + + this = mydata; +- ++ priv = this->private; + switch (event) { + case RPC_CLNT_CONNECT: { ++ pthread_mutex_lock(&priv->conn_mutex); ++ { ++ priv->conn_status = _gf_true; ++ } ++ pthread_mutex_unlock(&priv->conn_mutex); + gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT"); + break; + } + + case RPC_CLNT_DISCONNECT: { ++ pthread_mutex_lock(&priv->conn_mutex); ++ { ++ priv->conn_status = _gf_false; ++ pthread_cond_signal(&priv->conn_cond); ++ } ++ pthread_mutex_unlock(&priv->conn_mutex); + gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT"); + break; + } +diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c +index a0c236d..d1123ce 100644 +--- a/xlators/features/quota/src/quota.c ++++ b/xlators/features/quota/src/quota.c +@@ -5014,6 +5014,43 @@ quota_forget(xlator_t *this, inode_t *inode) + return 0; + } + ++int ++notify(xlator_t *this, int event, void *data, ...) ++{ ++ quota_priv_t *priv = NULL; ++ int ret = 0; ++ rpc_clnt_t *rpc = NULL; ++ gf_boolean_t conn_status = _gf_true; ++ xlator_t *victim = data; ++ ++ priv = this->private; ++ if (!priv || !priv->is_quota_on) ++ goto out; ++ ++ if (event == GF_EVENT_PARENT_DOWN) { ++ rpc = priv->rpc_clnt; ++ if (rpc) { ++ rpc_clnt_disable(rpc); ++ pthread_mutex_lock(&priv->conn_mutex); ++ { ++ conn_status = priv->conn_status; ++ while (conn_status) { ++ (void)pthread_cond_wait(&priv->conn_cond, ++ &priv->conn_mutex); ++ conn_status = priv->conn_status; ++ } ++ } ++ pthread_mutex_unlock(&priv->conn_mutex); ++ gf_log(this->name, GF_LOG_INFO, ++ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name); ++ } ++ } ++ ++out: ++ ret = default_notify(this, event, data); ++ return ret; ++} ++ + int32_t + init(xlator_t *this) + { +@@ -5056,6 +5093,10 @@ init(xlator_t *this) + goto err; + } + ++ pthread_mutex_init(&priv->conn_mutex, NULL); ++ pthread_cond_init(&priv->conn_cond, NULL); ++ priv->conn_status = _gf_false; ++ + if (priv->is_quota_on) { + rpc = quota_enforcer_init(this, this->options); + if (rpc == NULL) { +@@ -5169,20 +5210,22 @@ fini(xlator_t *this) + { + quota_priv_t *priv = NULL; + rpc_clnt_t *rpc = NULL; +- int i = 0, cnt = 0; + + priv = this->private; + if (!priv) + return; + rpc = priv->rpc_clnt; + priv->rpc_clnt = NULL; +- this->private = NULL; + if (rpc) { +- cnt = GF_ATOMIC_GET(rpc->refcount); +- for (i = 0; i < cnt; i++) +- rpc_clnt_unref(rpc); ++ rpc_clnt_connection_cleanup(&rpc->conn); ++ rpc_clnt_unref(rpc); + } ++ ++ this->private = NULL; + LOCK_DESTROY(&priv->lock); ++ pthread_mutex_destroy(&priv->conn_mutex); ++ pthread_cond_destroy(&priv->conn_cond); ++ + GF_FREE(priv); + if (this->local_pool) { + mem_pool_destroy(this->local_pool); +@@ -5314,6 +5357,7 @@ struct volume_options options[] = { + xlator_api_t xlator_api = { + .init = init, + .fini = fini, ++ .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ +diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h +index a5a99ca..e51ffd4 100644 +--- a/xlators/features/quota/src/quota.h ++++ b/xlators/features/quota/src/quota.h +@@ -217,6 +217,9 @@ struct quota_priv { + char *volume_uuid; + uint64_t validation_count; + int32_t quotad_conn_status; ++ pthread_mutex_t conn_mutex; ++ pthread_cond_t conn_cond; ++ gf_boolean_t conn_status; + }; + typedef struct quota_priv quota_priv_t; + +diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c +index a5f09fe..54d9c0f 100644 +--- a/xlators/protocol/server/src/server.c ++++ b/xlators/protocol/server/src/server.c +@@ -409,7 +409,13 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name) + + arg = calloc(1, sizeof(*arg)); + arg->this = this; +- arg->victim_name = gf_strdup(victim_name); ++ arg->victim_name = strdup(victim_name); ++ if (!arg->victim_name) { ++ gf_smsg(this->name, GF_LOG_CRITICAL, ENOMEM, LG_MSG_NO_MEMORY, ++ "Memory allocation is failed"); ++ return; ++ } ++ + th_ret = gf_thread_create_detached(&th_id, server_graph_janitor_threads, + arg, "graphjanitor"); + if (th_ret) { +@@ -417,7 +423,7 @@ server_call_xlator_mem_cleanup(xlator_t *this, char *victim_name) + "graph janitor Thread" + " creation is failed for brick %s", + victim_name); +- GF_FREE(arg->victim_name); ++ free(arg->victim_name); + free(arg); + } + } +@@ -628,7 +634,7 @@ server_graph_janitor_threads(void *data) + } + + out: +- GF_FREE(arg->victim_name); ++ free(arg->victim_name); + free(arg); + return NULL; + } +-- +1.8.3.1 + diff --git a/SOURCES/0448-Posix-Use-simple-approach-to-close-fd.patch b/SOURCES/0448-Posix-Use-simple-approach-to-close-fd.patch new file mode 100644 index 0000000..f030358 --- /dev/null +++ b/SOURCES/0448-Posix-Use-simple-approach-to-close-fd.patch @@ -0,0 +1,341 @@ +From 175c99dccc47d2b4267a8819404e5cbeb8cfba11 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Thu, 12 Mar 2020 21:12:13 +0530 +Subject: [PATCH 448/449] Posix: Use simple approach to close fd + +Problem: posix_release(dir) functions add the fd's into a ctx->janitor_fds + and janitor thread closes the fd's.In brick_mux environment it is + difficult to handle race condition in janitor threads because brick + spawns a single janitor thread for all bricks. + +Solution: Use synctask to execute posix_release(dir) functions instead of + using background a thread to close fds. + +> Credits: Pranith Karampuri <pkarampu@redhat.com> +> Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e +> Fixes: bz#1811631 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry pick from commit fb20713b380e1df8d7f9e9df96563be2f9144fd6) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24221/) + +BUG: 1790336 +Change-Id: Iffb031f0695a7da83d5a2f6bac8863dad225317e +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202791 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/glusterfs.h | 6 +- + libglusterfs/src/glusterfs/syncop.h | 7 +- + rpc/rpc-lib/src/rpcsvc.c | 6 ++ + run-tests.sh | 2 +- + tests/features/ssl-authz.t | 7 +- + xlators/storage/posix/src/posix-common.c | 4 -- + xlators/storage/posix/src/posix-helpers.c | 98 -------------------------- + xlators/storage/posix/src/posix-inode-fd-ops.c | 28 ++------ + xlators/storage/posix/src/posix.h | 3 - + 9 files changed, 20 insertions(+), 141 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 584846e..495a4d7 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -734,11 +734,7 @@ struct _glusterfs_ctx { + + struct list_head volfile_list; + +- /* Add members to manage janitor threads for cleanup fd */ +- struct list_head janitor_fds; +- pthread_cond_t janitor_cond; +- pthread_mutex_t janitor_lock; +- pthread_t janitor; ++ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ + }; + typedef struct _glusterfs_ctx glusterfs_ctx_t; + +diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h +index 3011b4c..1e4c73b 100644 +--- a/libglusterfs/src/glusterfs/syncop.h ++++ b/libglusterfs/src/glusterfs/syncop.h +@@ -254,7 +254,7 @@ struct syncopctx { + task = synctask_get(); \ + stb->task = task; \ + if (task) \ +- frame = task->opframe; \ ++ frame = copy_frame(task->opframe); \ + else \ + frame = syncop_create_frame(THIS); \ + \ +@@ -269,10 +269,7 @@ struct syncopctx { + STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params); \ + \ + __yield(stb); \ +- if (task) \ +- STACK_RESET(frame->root); \ +- else \ +- STACK_DESTROY(frame->root); \ ++ STACK_DESTROY(frame->root); \ + } while (0) + + /* +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 3f184bf..23ca1fd 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -375,6 +375,12 @@ rpcsvc_program_actor(rpcsvc_request_t *req) + + req->ownthread = program->ownthread; + req->synctask = program->synctask; ++ if (((req->procnum == GFS3_OP_RELEASE) || ++ (req->procnum == GFS3_OP_RELEASEDIR)) && ++ (program->prognum == GLUSTER_FOP_PROGRAM)) { ++ req->ownthread = _gf_false; ++ req->synctask = _gf_true; ++ } + + err = SUCCESS; + gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s", +diff --git a/run-tests.sh b/run-tests.sh +index 5683b21..c835d93 100755 +--- a/run-tests.sh ++++ b/run-tests.sh +@@ -356,7 +356,7 @@ function run_tests() + selected_tests=$((selected_tests+1)) + echo + echo $section_separator$section_separator +- if [[ $(get_test_status $t) == "BAD_TEST" ]] && \ ++ if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \ + [[ $skip_bad_tests == "yes" ]] + then + skipped_bad_tests=$((skipped_bad_tests+1)) +diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t +index 132b598..497083e 100755 +--- a/tests/features/ssl-authz.t ++++ b/tests/features/ssl-authz.t +@@ -67,13 +67,14 @@ echo "Memory consumption for glusterfsd process" + for i in $(seq 1 100); do + gluster v heal $V0 info >/dev/null + done +- ++#Wait to cleanup memory ++sleep 10 + end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'` + diff=$((end-start)) + +-# If memory consumption is more than 5M some leak in SSL code path ++# If memory consumption is more than 15M some leak in SSL code path + +-TEST [ $diff -lt 5000 ] ++TEST [ $diff -lt 15000 ] + + + # Set ssl-allow to a wildcard that includes our identity. +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index 2cb58ba..ac53796 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -1041,10 +1041,6 @@ posix_init(xlator_t *this) + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); +- ret = posix_spawn_ctx_janitor_thread(this); +- if (ret) +- goto out; +- + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 2336add..39dbcce 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1582,104 +1582,6 @@ unlock: + return; + } + +-static struct posix_fd * +-janitor_get_next_fd(glusterfs_ctx_t *ctx, int32_t janitor_sleep) +-{ +- struct posix_fd *pfd = NULL; +- +- struct timespec timeout; +- +- pthread_mutex_lock(&ctx->janitor_lock); +- { +- if (list_empty(&ctx->janitor_fds)) { +- time(&timeout.tv_sec); +- timeout.tv_sec += janitor_sleep; +- timeout.tv_nsec = 0; +- +- pthread_cond_timedwait(&ctx->janitor_cond, &ctx->janitor_lock, +- &timeout); +- goto unlock; +- } +- +- pfd = list_entry(ctx->janitor_fds.next, struct posix_fd, list); +- +- list_del(ctx->janitor_fds.next); +- } +-unlock: +- pthread_mutex_unlock(&ctx->janitor_lock); +- +- return pfd; +-} +- +-static void * +-posix_ctx_janitor_thread_proc(void *data) +-{ +- xlator_t *this = NULL; +- struct posix_fd *pfd; +- glusterfs_ctx_t *ctx = NULL; +- struct posix_private *priv = NULL; +- int32_t sleep_duration = 0; +- +- this = data; +- ctx = THIS->ctx; +- THIS = this; +- +- priv = this->private; +- sleep_duration = priv->janitor_sleep_duration; +- while (1) { +- pfd = janitor_get_next_fd(ctx, sleep_duration); +- if (pfd) { +- if (pfd->dir == NULL) { +- gf_msg_trace(this->name, 0, "janitor: closing file fd=%d", +- pfd->fd); +- sys_close(pfd->fd); +- } else { +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", +- pfd->dir); +- sys_closedir(pfd->dir); +- } +- +- GF_FREE(pfd); +- } +- } +- +- return NULL; +-} +- +-int +-posix_spawn_ctx_janitor_thread(xlator_t *this) +-{ +- struct posix_private *priv = NULL; +- int ret = 0; +- glusterfs_ctx_t *ctx = NULL; +- +- priv = this->private; +- ctx = THIS->ctx; +- +- LOCK(&priv->lock); +- { +- if (!ctx->janitor) { +- pthread_mutex_init(&ctx->janitor_lock, NULL); +- pthread_cond_init(&ctx->janitor_cond, NULL); +- INIT_LIST_HEAD(&ctx->janitor_fds); +- +- ret = gf_thread_create(&ctx->janitor, NULL, +- posix_ctx_janitor_thread_proc, this, +- "posixctxjan"); +- +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, +- "spawning janitor " +- "thread failed"); +- goto unlock; +- } +- } +- } +-unlock: +- UNLOCK(&priv->lock); +- return ret; +-} +- + static int + is_fresh_file(int64_t ctime_sec) + { +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 5748b9f..d135d8b 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1358,7 +1358,6 @@ posix_releasedir(xlator_t *this, fd_t *fd) + struct posix_fd *pfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; +- glusterfs_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); +@@ -1376,21 +1375,11 @@ posix_releasedir(xlator_t *this, fd_t *fd) + goto out; + } + +- ctx = THIS->ctx; +- +- pthread_mutex_lock(&ctx->janitor_lock); +- { +- INIT_LIST_HEAD(&pfd->list); +- list_add_tail(&pfd->list, &ctx->janitor_fds); +- pthread_cond_signal(&ctx->janitor_cond); +- } +- pthread_mutex_unlock(&ctx->janitor_lock); +- +- /*gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); ++ gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); + + sys_closedir(pfd->dir); + GF_FREE(pfd); +- */ ++ + out: + return 0; + } +@@ -2510,13 +2499,11 @@ posix_release(xlator_t *this, fd_t *fd) + struct posix_fd *pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; +- glusterfs_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; +- ctx = THIS->ctx; + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { +@@ -2531,13 +2518,10 @@ posix_release(xlator_t *this, fd_t *fd) + "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); + } + +- pthread_mutex_lock(&ctx->janitor_lock); +- { +- INIT_LIST_HEAD(&pfd->list); +- list_add_tail(&pfd->list, &ctx->janitor_fds); +- pthread_cond_signal(&ctx->janitor_cond); +- } +- pthread_mutex_unlock(&ctx->janitor_lock); ++ gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); ++ ++ sys_close(pfd->fd); ++ GF_FREE(pfd); + + if (!priv) + goto out; +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index ac9d83c..61495a7 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -666,9 +666,6 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, + int + posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + +-int +-posix_spawn_ctx_janitor_thread(xlator_t *this); +- + void + posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +-- +1.8.3.1 + diff --git a/SOURCES/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch b/SOURCES/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch new file mode 100644 index 0000000..6a161bf --- /dev/null +++ b/SOURCES/0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch @@ -0,0 +1,107 @@ +From 6e15fca1621b06270983f57ac146f0f8e52f0797 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Tue, 9 Jun 2020 15:38:12 +0530 +Subject: [PATCH 449/449] test: Test case brick-mux-validation-in-cluster.t is + failing on RHEL-8 + +Brick process are not properly attached on any cluster node while +some volume options are changed on peer node and glusterd is down on +that specific node. + +Solution: At the time of restart glusterd it got a friend update request +from a peer node if peer node having some changes on volume.If the brick +process is started before received a friend update request in that case +brick_mux behavior is not workingproperly. All bricks are attached to +the same process even volumes options are not the same. To avoid the +issue introduce an atomic flag volpeerupdate and update the value while +glusterd has received a friend update request from peer for a specific +volume.If volpeerupdate flag is 1 volume is started by +glusterd_import_friend_volume synctask + +> Change-Id: I4c026f1e7807ded249153670e6967a2be8d22cb7 +> Credit: Sanju Rakaonde <srakonde@redhat.com> +> fixes: #1290 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24540/) +> (Cherry pick from commit 955bfd567329cf7fe63e9c3b89d333a55e5e9a20) + +BUG: 1844359 +Change-Id: I4c026f1e7807ded249153670e6967a2be8d22cb7 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202812 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sanju Rakonde <srakonde@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/brick-mux-validation-in-cluster.t | 4 +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 7 +++++-- + xlators/mgmt/glusterd/src/glusterd.h | 4 ++++ + 3 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +index f088dbb..b6af487 100644 +--- a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t ++++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t +@@ -100,10 +100,8 @@ $CLI_2 volume set $V0 performance.readdir-ahead on + $CLI_2 volume set $V1 performance.readdir-ahead on + + TEST $glusterd_1; ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +-sleep 10 +- +-EXPECT 4 count_brick_processes + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids + EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 2eb2a76..6f904ae 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3758,6 +3758,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + "Version of volume %s differ. local version = %d, " + "remote version = %d on peer %s", + volinfo->volname, volinfo->version, version, hostname); ++ GF_ATOMIC_INIT(volinfo->volpeerupdate, 1); + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + goto out; + } else if (version < volinfo->version) { +@@ -4784,7 +4785,8 @@ glusterd_volinfo_stop_stale_bricks(glusterd_volinfo_t *new_volinfo, + * or if it's part of the new volume and is pending a snap, + * then stop the brick process + */ +- if (ret || (new_brickinfo->snap_status == -1)) { ++ if (ret || (new_brickinfo->snap_status == -1) || ++ GF_ATOMIC_GET(old_volinfo->volpeerupdate)) { + /*TODO: may need to switch to 'atomic' flavour of + * brick_stop, once we make peer rpc program also + * synctask enabled*/ +@@ -6490,7 +6492,8 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo, + * three different triggers for an attempt to start the brick process + * due to the quorum handling code in glusterd_friend_sm. + */ +- if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered) { ++ if (brickinfo->status == GF_BRICK_STARTING || brickinfo->start_triggered || ++ GF_ATOMIC_GET(volinfo->volpeerupdate)) { + gf_msg_debug(this->name, 0, + "brick %s is already in starting " + "phase", +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index 1c6c3b1..f739b5d 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -523,6 +523,10 @@ struct glusterd_volinfo_ { + pthread_mutex_t store_volinfo_lock; /* acquire lock for + * updating the volinfo + */ ++ gf_atomic_t volpeerupdate; ++ /* Flag to check about volume has received updates ++ from peer ++ */ + }; + + typedef enum gd_snap_status_ { +-- +1.8.3.1 + diff --git a/SOURCES/0450-tests-basic-ctime-enable-ctime-before-testing.patch b/SOURCES/0450-tests-basic-ctime-enable-ctime-before-testing.patch new file mode 100644 index 0000000..96de5a1 --- /dev/null +++ b/SOURCES/0450-tests-basic-ctime-enable-ctime-before-testing.patch @@ -0,0 +1,35 @@ +From 09dce9ce8e946a86209b6f057bf14323036fa12a Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Wed, 10 Jun 2020 11:44:56 +0530 +Subject: [PATCH 450/451] tests/basic/ctime: enable ctime before testing + +This is to ensure that this test successfully runs, even if +ctime is disabled by default (which is the case in downstream.) + +Label: DOWNSTREAM ONLY + +BUG: 1844359 +Change-Id: I91e80b3d8a56fc089aeb58b0254812111d394842 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/202874 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/ctime/ctime-utimesat.t | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t +index 540e57a..da12fbe 100644 +--- a/tests/basic/ctime/ctime-utimesat.t ++++ b/tests/basic/ctime/ctime-utimesat.t +@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.read-after-open off + TEST $CLI volume set $V0 performance.open-behind off + TEST $CLI volume set $V0 performance.write-behind off + TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 ctime on + + TEST $CLI volume start $V0 + +-- +1.8.3.1 + diff --git a/SOURCES/0451-extras-Modify-group-virt-to-include-network-related-.patch b/SOURCES/0451-extras-Modify-group-virt-to-include-network-related-.patch new file mode 100644 index 0000000..bba69e1 --- /dev/null +++ b/SOURCES/0451-extras-Modify-group-virt-to-include-network-related-.patch @@ -0,0 +1,44 @@ +From 96d9b659fd0367abe1666a5ac6203208e0dc056d Mon Sep 17 00:00:00 2001 +From: Krutika Dhananjay <kdhananj@redhat.com> +Date: Mon, 4 May 2020 14:30:57 +0530 +Subject: [PATCH 451/451] extras: Modify group 'virt' to include + network-related options + +This is needed to work around an issue seen where vms running on +online hosts are getting killed when a different host is rebooted +in ovirt-gluster hyperconverged environments. Actual RCA is quite +lengthy and documented in the github issue. Please refer to it +for more details. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24400 +> Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +> Fixes: #1217 +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +Change-Id: Ic25b5f50144ad42458e5c847e1e7e191032396c1 +BUG: 1845064 +Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/203291 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/group-virt.example | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/extras/group-virt.example b/extras/group-virt.example +index c2ce89d..3a441eb 100644 +--- a/extras/group-virt.example ++++ b/extras/group-virt.example +@@ -16,3 +16,8 @@ cluster.choose-local=off + client.event-threads=4 + server.event-threads=4 + performance.client-io-threads=on ++network.ping-timeout=20 ++server.tcp-user-timeout=20 ++server.keepalive-time=10 ++server.keepalive-interval=2 ++server.keepalive-count=5 +-- +1.8.3.1 + diff --git a/SOURCES/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch b/SOURCES/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch new file mode 100644 index 0000000..0b115bb --- /dev/null +++ b/SOURCES/0452-Tier-DHT-Handle-the-pause-case-missed-out.patch @@ -0,0 +1,48 @@ +From c184943bdf38de5b4cbf165fd1cd98ce7bd9e976 Mon Sep 17 00:00:00 2001 +From: hari gowtham <hgowtham@redhat.com> +Date: Tue, 16 Jun 2020 14:47:53 +0530 +Subject: [PATCH 452/456] Tier/DHT: Handle the pause case missed out + +Problem: While backporting a change from master +the changes related to tier were removed. This started affecting +the tier pause functionality. Backporting it +to downstream left this usecase messed up as we still support tier. +patch that caused this: https://code.engineering.redhat.com/gerrit/#/c/202647/2 + +Fix: add the condition back for tier pause to work. + +Label: DOWNSTREAM ONLY + +BUG: 1844359 +Change-Id: I46c6c179b09c7e1a729be9fd257fa4a490f0287e +Signed-off-by: hari gowtham <hgowtham@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/203560 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index e9974cd..abc10fc 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1160,6 +1160,15 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + break; + } + ++ if ((defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) && ++ (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING)) { ++ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, ++ "Migrate file paused"); ++ ret = -1; ++ break; ++ } ++ ++ + offset += ret; + total += ret; + +-- +1.8.3.1 + diff --git a/SOURCES/0453-glusterd-add-brick-command-failure.patch b/SOURCES/0453-glusterd-add-brick-command-failure.patch new file mode 100644 index 0000000..dd21350 --- /dev/null +++ b/SOURCES/0453-glusterd-add-brick-command-failure.patch @@ -0,0 +1,300 @@ +From a04592cce9aaa6ccb8a038bc3b4e31bc125d1d10 Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Tue, 16 Jun 2020 18:03:21 +0530 +Subject: [PATCH 453/456] glusterd: add-brick command failure + +Problem: add-brick operation is failing when replica or disperse +count is not mentioned in the add-brick command. + +Reason: with commit a113d93 we are checking brick order while +doing add-brick operation for replica and disperse volumes. If +replica count or disperse count is not mentioned in the command, +the dict get is failing and resulting add-brick operation failure. + +> upstream patch: https://review.gluster.org/#/c/glusterfs/+/24581/ +> fixes: #1306 +> Change-Id: Ie957540e303bfb5f2d69015661a60d7e72557353 +> Signed-off-by: Sanju Rakonde <srakonde@redhat.com> + +BUG: 1847081 +Change-Id: Ie957540e303bfb5f2d69015661a60d7e72557353 +Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/203867 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/brick-order-check-add-brick.t | 40 ++++++++++++++++++++++ + tests/cluster.rc | 11 ++++-- + xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 39 ++++++++++++++------- + xlators/mgmt/glusterd/src/glusterd-utils.c | 30 ++--------------- + xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 41 +++++++++++++++++++---- + 6 files changed, 115 insertions(+), 49 deletions(-) + create mode 100644 tests/bugs/glusterd/brick-order-check-add-brick.t + +diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t +new file mode 100644 +index 0000000..29f0ed1 +--- /dev/null ++++ b/tests/bugs/glusterd/brick-order-check-add-brick.t +@@ -0,0 +1,40 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup; ++ ++TEST verify_lvm_version; ++#Create cluster with 3 nodes ++TEST launch_cluster 3 -NO_DEBUG -NO_FORCE ++TEST setup_lvm 3 ++ ++TEST $CLI_1 peer probe $H2 ++TEST $CLI_1 peer probe $H3 ++EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count ++ ++TEST $CLI_1 volume create $V0 replica 3 $H1:$L1/$V0 $H2:$L2/$V0 $H3:$L3/$V0 ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++EXPECT 'Created' volinfo_field $V0 'Status' ++ ++TEST $CLI_1 volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++#add-brick with or without mentioning the replica count should not fail ++TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}_1 $H2:$L2/${V0}_1 $H3:$L3/${V0}_1 ++EXPECT '2 x 3 = 6' volinfo_field $V0 'Number of Bricks' ++ ++TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_2 $H2:$L2/${V0}_2 $H3:$L3/${V0}_2 ++EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' ++ ++#adding bricks from same host should fail the brick order check ++TEST ! $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 ++EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' ++ ++#adding bricks from same host with force should succeed ++TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force ++EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks' ++ ++cleanup +diff --git a/tests/cluster.rc b/tests/cluster.rc +index 99be8e7..8b73153 100644 +--- a/tests/cluster.rc ++++ b/tests/cluster.rc +@@ -11,7 +11,7 @@ function launch_cluster() { + define_backends $count; + define_hosts $count; + define_glusterds $count $2; +- define_clis $count; ++ define_clis $count $3; + + start_glusterds; + } +@@ -133,8 +133,13 @@ function define_clis() { + lopt1="--log-file=$logdir/$logfile1" + + +- eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'"; +- eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'"; ++ if [ "$2" == "-NO_FORCE" ]; then ++ eval "CLI_$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt'"; ++ eval "CLI$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'"; ++ else ++ eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'"; ++ eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'"; ++ fi + done + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +index 121346c..5ae577a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +@@ -1576,20 +1576,35 @@ glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict) + + /* Check brick order if the volume type is replicate or disperse. If + * force at the end of command not given then check brick order. ++ * doing this check at the originator node is sufficient. + */ + +- if (!is_force) { +- if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || +- (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = glusterd_check_brick_order(dict, msg, volinfo->type); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, +- "Not adding brick because of " +- "bad brick order. %s", +- msg); +- *op_errstr = gf_strdup(msg); +- goto out; +- } ++ if (is_origin_glusterd(dict) && !is_force) { ++ ret = 0; ++ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { ++ gf_msg_debug(this->name, 0, ++ "Replicate cluster type " ++ "found. Checking brick order."); ++ if (replica_count) ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type, ++ replica_count); ++ else ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type, ++ volinfo->replica_count); ++ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { ++ gf_msg_debug(this->name, 0, ++ "Disperse cluster type" ++ " found. Checking brick order."); ++ ret = glusterd_check_brick_order(dict, msg, volinfo->type, ++ volinfo->disperse_count); ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not adding brick because of " ++ "bad brick order. %s", ++ msg); ++ *op_errstr = gf_strdup(msg); ++ goto out; + } + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 6f904ae..545e688 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -14802,7 +14802,8 @@ glusterd_compare_addrinfo(struct addrinfo *first, struct addrinfo *next) + * volume are present on the same server + */ + int32_t +-glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, ++ int32_t sub_count) + { + int ret = -1; + int i = 0; +@@ -14819,7 +14820,6 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) + char *tmpptr = NULL; + char *volname = NULL; + int32_t brick_count = 0; +- int32_t sub_count = 0; + struct addrinfo *ai_info = NULL; + char brick_addr[128] = { + 0, +@@ -14870,31 +14870,6 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type) + goto out; + } + +- if (type != GF_CLUSTER_TYPE_DISPERSE) { +- ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve replica count"); +- goto out; +- } +- gf_msg_debug(this->name, 0, +- "Replicate cluster type " +- "found. Checking brick order."); +- } else { +- ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"), +- &sub_count); +- if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +- "Bricks check : Could" +- " not retrieve disperse count"); +- goto out; +- } +- gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DISPERSE_CLUSTER_FOUND, +- "Disperse cluster type" +- " found. Checking brick order."); +- } + brick_list_dup = brick_list_ptr = gf_strdup(brick_list); + /* Resolve hostnames and get addrinfo */ + while (i < brick_count) { +@@ -14989,5 +14964,6 @@ out: + ai_list_tmp2 = ai_list_tmp1; + } + free(ai_list_tmp2); ++ gf_msg_debug("glusterd", 0, "Returning %d", ret); + return ret; + } +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index e2e2454..5f5de82 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -883,6 +883,7 @@ char * + search_brick_path_from_proc(pid_t brick_pid, char *brickpath); + + int32_t +-glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type); ++glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, ++ int32_t sub_count); + + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 8da2ff3..134b04c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -1024,6 +1024,8 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + int32_t local_brick_count = 0; + int32_t i = 0; + int32_t type = 0; ++ int32_t replica_count = 0; ++ int32_t disperse_count = 0; + char *brick = NULL; + char *tmpptr = NULL; + xlator_t *this = NULL; +@@ -1119,15 +1121,42 @@ glusterd_op_stage_create_volume(dict_t *dict, char **op_errstr, + } + + if (!is_force) { +- if ((type == GF_CLUSTER_TYPE_REPLICATE) || +- (type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = glusterd_check_brick_order(dict, msg, type); ++ if (type == GF_CLUSTER_TYPE_REPLICATE) { ++ ret = dict_get_int32n(dict, "replica-count", ++ SLEN("replica-count"), &replica_count); + if (ret) { +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, +- "Not creating volume because of " +- "bad brick order"); ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve replica count"); ++ goto out; ++ } ++ gf_msg_debug(this->name, 0, ++ "Replicate cluster type " ++ "found. Checking brick order."); ++ ret = glusterd_check_brick_order(dict, msg, type, ++ replica_count); ++ } else if (type == GF_CLUSTER_TYPE_DISPERSE) { ++ ret = dict_get_int32n(dict, "disperse-count", ++ SLEN("disperse-count"), &disperse_count); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, ++ "Bricks check : Could" ++ " not retrieve disperse count"); + goto out; + } ++ gf_msg_debug(this->name, 0, ++ "Disperse cluster type" ++ " found. Checking brick order."); ++ ret = glusterd_check_brick_order(dict, msg, type, ++ disperse_count); ++ } ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER, ++ "Not creating the volume because of " ++ "bad brick order. %s", ++ msg); ++ *op_errstr = gf_strdup(msg); ++ goto out; + } + } + } +-- +1.8.3.1 + diff --git a/SOURCES/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch b/SOURCES/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch new file mode 100644 index 0000000..6ad460d --- /dev/null +++ b/SOURCES/0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch @@ -0,0 +1,152 @@ +From cddd253c5e3f0a7c3b91c35cea8ad1921cb43b98 Mon Sep 17 00:00:00 2001 +From: Kinglong Mee <kinglongmee@gmail.com> +Date: Thu, 18 Jul 2019 11:43:01 +0800 +Subject: [PATCH 454/456] features/locks: avoid use after freed of frame for + blocked lock + +The fop contains blocked lock may use freed frame info when other +unlock fop has unwind the blocked lock. + +Because the blocked lock is added to block list in inode lock(or +other lock), after that, when out of the inode lock, the fop +contains the blocked lock should not use it. + +Upstream Patch - https://review.gluster.org/#/c/glusterfs/+/23155/ + +>Change-Id: Icb309a1cc78380dc982b26d50c18d67e4f2c8915 +>fixes: bz#1737291 +>Signed-off-by: Kinglong Mee <mijinlong@horiscale.com> + +Change-Id: Icb309a1cc78380dc982b26d50c18d67e4f2c8915 +BUG: 1812789 +Reviewed-on: https://code.engineering.redhat.com/gerrit/206465 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + xlators/features/locks/src/common.c | 4 ++++ + xlators/features/locks/src/entrylk.c | 4 ++-- + xlators/features/locks/src/inodelk.c | 7 +++++-- + xlators/features/locks/src/posix.c | 5 +++-- + xlators/features/locks/src/reservelk.c | 2 -- + 5 files changed, 14 insertions(+), 8 deletions(-) + +diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c +index 6e7fb4b..1406e70 100644 +--- a/xlators/features/locks/src/common.c ++++ b/xlators/features/locks/src/common.c +@@ -1080,6 +1080,10 @@ pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock, + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); ++ ++ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, ++ &lock->user_flock, NULL); ++ + lock->blocked = 1; + __insert_lock(pl_inode, lock); + ret = -1; +diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c +index ced5eca..93c649c 100644 +--- a/xlators/features/locks/src/entrylk.c ++++ b/xlators/features/locks/src/entrylk.c +@@ -552,6 +552,8 @@ __lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom, + gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}", + pinode, lock->basename); + ++ entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename, ++ ENTRYLK_LOCK, lock->type); + out: + return -EAGAIN; + } +@@ -932,8 +934,6 @@ out: + op_ret, op_errno); + unwind: + STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL); +- } else { +- entrylk_trace_block(this, frame, volume, fd, loc, basename, cmd, type); + } + + if (pcontend != NULL) { +diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c +index a9c42f1..24dee49 100644 +--- a/xlators/features/locks/src/inodelk.c ++++ b/xlators/features/locks/src/inodelk.c +@@ -420,6 +420,8 @@ __lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, + lkowner_utoa(&lock->owner), lock->user_flock.l_start, + lock->user_flock.l_len); + ++ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, ++ lock->volume); + out: + return -EAGAIN; + } +@@ -959,6 +961,7 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + int ret = -1; + GF_UNUSED int dict_ret = -1; + int can_block = 0; ++ short lock_type = 0; + pl_inode_t *pinode = NULL; + pl_inode_lock_t *reqlock = NULL; + pl_dom_list_t *dom = NULL; +@@ -1024,13 +1027,13 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + /* fall through */ + + case F_SETLK: ++ lock_type = flock->l_type; + memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock)); + ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom, + inode); + + if (ret < 0) { +- if ((can_block) && (F_UNLCK != flock->l_type)) { +- pl_trace_block(this, frame, fd, loc, cmd, flock, volume); ++ if ((can_block) && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 50f1265..7887b82 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -2557,6 +2557,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + uint32_t lk_flags = 0; + posix_locks_private_t *priv = this->private; + pl_local_t *local = NULL; ++ short lock_type = 0; + + int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags); + if (ret == 0) { +@@ -2701,6 +2702,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + case F_SETLK: + reqlock->frame = frame; + reqlock->this = this; ++ lock_type = flock->l_type; + + pthread_mutex_lock(&pl_inode->mutex); + { +@@ -2738,8 +2740,7 @@ pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + + ret = pl_setlk(this, pl_inode, reqlock, can_block); + if (ret == -1) { +- if ((can_block) && (F_UNLCK != flock->l_type)) { +- pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL); ++ if ((can_block) && (F_UNLCK != lock_type)) { + goto out; + } + gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); +diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c +index 51076d7..604691f 100644 +--- a/xlators/features/locks/src/reservelk.c ++++ b/xlators/features/locks/src/reservelk.c +@@ -312,8 +312,6 @@ grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode) + ret = pl_setlk(this, pl_inode, lock, can_block); + if (ret == -1) { + if (can_block) { +- pl_trace_block(this, lock->frame, fd, NULL, cmd, +- &lock->user_flock, NULL); + continue; + } else { + gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN"); +-- +1.8.3.1 + diff --git a/SOURCES/0455-locks-prevent-deletion-of-locked-entries.patch b/SOURCES/0455-locks-prevent-deletion-of-locked-entries.patch new file mode 100644 index 0000000..5960690 --- /dev/null +++ b/SOURCES/0455-locks-prevent-deletion-of-locked-entries.patch @@ -0,0 +1,1253 @@ +From 3f6ff474db3934f43d9963dfe4dda7d201211e75 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 12 Jun 2020 00:06:36 +0200 +Subject: [PATCH 455/456] locks: prevent deletion of locked entries + +To keep consistency inside transactions started by locking an entry or +an inode, this change delays the removal of entries that are currently +locked by one or more clients. Once all locks are released, the removal +is processed. + +It has also been improved the detection of stale inodes in the locking +code of EC. + +>Upstream patch - https://review.gluster.org/#/c/glusterfs/+/20025/ +>Fixes: #990 + +Change-Id: Ic8ba23d9480f80c7f74e7a310bf8a15922320fd5 +BUG: 1812789 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/206442 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + xlators/cluster/ec/src/ec-locks.c | 69 ++++++-- + xlators/features/locks/src/common.c | 316 ++++++++++++++++++++++++++++++++++- + xlators/features/locks/src/common.h | 43 +++++ + xlators/features/locks/src/entrylk.c | 19 +-- + xlators/features/locks/src/inodelk.c | 150 ++++++++++------- + xlators/features/locks/src/locks.h | 23 ++- + xlators/features/locks/src/posix.c | 183 ++++++++++++++++++-- + 7 files changed, 689 insertions(+), 114 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c +index ffcac07..db86296 100644 +--- a/xlators/cluster/ec/src/ec-locks.c ++++ b/xlators/cluster/ec/src/ec-locks.c +@@ -28,9 +28,36 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) + ec_t *ec = fop->xl->private; + ec_cbk_data_t *ans = NULL; + ec_cbk_data_t *cbk = NULL; +- uintptr_t locked = 0, notlocked = 0; ++ uintptr_t locked = 0; ++ int32_t good = 0; ++ int32_t eagain = 0; ++ int32_t estale = 0; + int32_t error = -1; + ++ /* There are some errors that we'll handle in an special way while trying ++ * to acquire a lock. ++ * ++ * EAGAIN: If it's found during a parallel non-blocking lock request, we ++ * consider that there's contention on the inode, so we consider ++ * the acquisition a failure and try again with a sequential ++ * blocking lock request. This will ensure that we get a lock on ++ * as many bricks as possible (ignoring EAGAIN here would cause ++ * unnecessary triggers of self-healing). ++ * ++ * If it's found during a sequential blocking lock request, it's ++ * considered an error. Lock will only succeed if there are ++ * enough other bricks locked. ++ * ++ * ESTALE: This can appear during parallel or sequential lock request if ++ * the inode has just been unlinked. We consider this error is ++ * not recoverable, but we also don't consider it as fatal. So, ++ * if it happens during parallel lock, we won't attempt a ++ * sequential one unless there are EAGAIN errors on other ++ * bricks (and are enough to form a quorum), but if we reach ++ * quorum counting the ESTALE bricks, we consider the whole ++ * result of the operation is ESTALE instead of EIO. ++ */ ++ + list_for_each_entry(ans, &fop->cbk_list, list) + { + if (ans->op_ret >= 0) { +@@ -38,24 +65,23 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) + error = EIO; + } + locked |= ans->mask; ++ good = ans->count; + cbk = ans; +- } else { +- if (ans->op_errno == EAGAIN) { +- switch (fop->uint32) { +- case EC_LOCK_MODE_NONE: +- case EC_LOCK_MODE_ALL: +- /* Goal is to treat non-blocking lock as failure +- * even if there is a single EAGAIN*/ +- notlocked |= ans->mask; +- break; +- } +- } ++ } else if (ans->op_errno == ESTALE) { ++ estale += ans->count; ++ } else if ((ans->op_errno == EAGAIN) && ++ (fop->uint32 != EC_LOCK_MODE_INC)) { ++ eagain += ans->count; + } + } + + if (error == -1) { +- if (gf_bits_count(locked | notlocked) >= ec->fragments) { +- if (notlocked == 0) { ++ /* If we have enough quorum with succeeded and EAGAIN answers, we ++ * ignore for now any ESTALE answer. If there are EAGAIN answers, ++ * we retry with a sequential blocking lock request if needed. ++ * Otherwise we succeed. */ ++ if ((good + eagain) >= ec->fragments) { ++ if (eagain == 0) { + if (fop->answer == NULL) { + fop->answer = cbk; + } +@@ -68,21 +94,28 @@ ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask) + case EC_LOCK_MODE_NONE: + error = EAGAIN; + break; +- + case EC_LOCK_MODE_ALL: + fop->uint32 = EC_LOCK_MODE_INC; + break; +- + default: ++ /* This shouldn't happen because eagain cannot be > 0 ++ * when fop->uint32 is EC_LOCK_MODE_INC. */ + error = EIO; + break; + } + } + } else { +- if (fop->answer && fop->answer->op_ret < 0) ++ /* We have been unable to find enough candidates that will be able ++ * to take the lock. If we have quorum on some answer, we return ++ * it. Otherwise we check if ESTALE answers allow us to reach ++ * quorum. If so, we return ESTALE. */ ++ if (fop->answer && fop->answer->op_ret < 0) { + error = fop->answer->op_errno; +- else ++ } else if ((good + eagain + estale) >= ec->fragments) { ++ error = ESTALE; ++ } else { + error = EIO; ++ } + } + } + +diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c +index 1406e70..0c52853 100644 +--- a/xlators/features/locks/src/common.c ++++ b/xlators/features/locks/src/common.c +@@ -462,11 +462,16 @@ pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local) + INIT_LIST_HEAD(&pl_inode->blocked_calls); + INIT_LIST_HEAD(&pl_inode->metalk_list); + INIT_LIST_HEAD(&pl_inode->queued_locks); ++ INIT_LIST_HEAD(&pl_inode->waiting); + gf_uuid_copy(pl_inode->gfid, inode->gfid); + + pl_inode->check_mlock_info = _gf_true; + pl_inode->mlock_enforced = _gf_false; + ++ /* -2 means never looked up. -1 means something went wrong and link ++ * tracking is disabled. */ ++ pl_inode->links = -2; ++ + ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode)); + if (ret) { + pthread_mutex_destroy(&pl_inode->mutex); +@@ -1276,4 +1281,313 @@ pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) + } + + return 0; +-} +\ No newline at end of file ++} ++ ++gf_boolean_t ++pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client) ++{ ++ if (client && (client->opversion < GD_OP_VERSION_7_0)) { ++ return _gf_true; ++ } ++ ++ if (is_lk_owner_null(owner)) { ++ return _gf_false; ++ } ++ return _gf_true; ++} ++ ++static int32_t ++pl_inode_from_loc(loc_t *loc, inode_t **pinode) ++{ ++ inode_t *inode = NULL; ++ int32_t error = 0; ++ ++ if (loc->inode != NULL) { ++ inode = inode_ref(loc->inode); ++ goto done; ++ } ++ ++ if (loc->parent == NULL) { ++ error = EINVAL; ++ goto done; ++ } ++ ++ if (!gf_uuid_is_null(loc->gfid)) { ++ inode = inode_find(loc->parent->table, loc->gfid); ++ if (inode != NULL) { ++ goto done; ++ } ++ } ++ ++ if (loc->name == NULL) { ++ error = EINVAL; ++ goto done; ++ } ++ ++ inode = inode_grep(loc->parent->table, loc->parent, loc->name); ++ if (inode == NULL) { ++ /* We haven't found any inode. This means that the file doesn't exist ++ * or that even if it exists, we don't have any knowledge about it, so ++ * we don't have locks on it either, which is fine for our purposes. */ ++ goto done; ++ } ++ ++done: ++ *pinode = inode; ++ ++ return error; ++} ++ ++static gf_boolean_t ++pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode, ++ struct timespec *now, struct list_head *contend) ++{ ++ pl_dom_list_t *dom; ++ pl_inode_lock_t *lock; ++ gf_boolean_t has_owners = _gf_false; ++ ++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list) ++ { ++ list_for_each_entry(lock, &dom->inodelk_list, list) ++ { ++ /* If the lock belongs to the same client, we assume it's related ++ * to the same operation, so we allow the removal to continue. */ ++ if (lock->client == client) { ++ continue; ++ } ++ /* If the lock belongs to an internal process, we don't block the ++ * removal. */ ++ if (lock->client_pid < 0) { ++ continue; ++ } ++ if (contend == NULL) { ++ return _gf_true; ++ } ++ has_owners = _gf_true; ++ inodelk_contention_notify_check(xl, lock, now, contend); ++ } ++ } ++ ++ return has_owners; ++} ++ ++int32_t ++pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, ++ pl_inode_t **ppl_inode, struct list_head *contend) ++{ ++ struct timespec now; ++ inode_t *inode; ++ pl_inode_t *pl_inode; ++ int32_t error; ++ ++ pl_inode = NULL; ++ ++ error = pl_inode_from_loc(loc, &inode); ++ if ((error != 0) || (inode == NULL)) { ++ goto done; ++ } ++ ++ pl_inode = pl_inode_get(xl, inode, NULL); ++ if (pl_inode == NULL) { ++ inode_unref(inode); ++ error = ENOMEM; ++ goto done; ++ } ++ ++ /* pl_inode_from_loc() already increments ref count for inode, so ++ * we only assign here our reference. */ ++ pl_inode->inode = inode; ++ ++ timespec_now(&now); ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ if (pl_inode->removed) { ++ error = ESTALE; ++ goto unlock; ++ } ++ ++ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) { ++ error = -1; ++ /* We skip the unlock here because the caller must create a stub when ++ * we return -1 and do a call to pl_inode_remove_complete(), which ++ * assumes the lock is still acquired and will release it once ++ * everything else is prepared. */ ++ goto done; ++ } ++ ++ pl_inode->is_locked = _gf_true; ++ pl_inode->remove_running++; ++ ++unlock: ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++done: ++ *ppl_inode = pl_inode; ++ ++ return error; ++} ++ ++int32_t ++pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, ++ struct list_head *contend) ++{ ++ pl_inode_lock_t *lock; ++ int32_t error = -1; ++ ++ if (stub != NULL) { ++ list_add_tail(&stub->list, &pl_inode->waiting); ++ pl_inode->is_locked = _gf_true; ++ } else { ++ error = ENOMEM; ++ ++ while (!list_empty(contend)) { ++ lock = list_first_entry(contend, pl_inode_lock_t, list); ++ list_del_init(&lock->list); ++ __pl_inodelk_unref(lock); ++ } ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++ if (error < 0) { ++ inodelk_contention_notify(xl, contend); ++ } ++ ++ inode_unref(pl_inode->inode); ++ ++ return error; ++} ++ ++void ++pl_inode_remove_wake(struct list_head *list) ++{ ++ call_stub_t *stub; ++ ++ while (!list_empty(list)) { ++ stub = list_first_entry(list, call_stub_t, list); ++ list_del_init(&stub->list); ++ ++ call_resume(stub); ++ } ++} ++ ++void ++pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error) ++{ ++ struct list_head contend, granted; ++ struct timespec now; ++ pl_dom_list_t *dom; ++ ++ if (pl_inode == NULL) { ++ return; ++ } ++ ++ INIT_LIST_HEAD(&contend); ++ INIT_LIST_HEAD(&granted); ++ timespec_now(&now); ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ if (error == 0) { ++ if (pl_inode->links >= 0) { ++ pl_inode->links--; ++ } ++ if (pl_inode->links == 0) { ++ pl_inode->removed = _gf_true; ++ } ++ } ++ ++ pl_inode->remove_running--; ++ ++ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) { ++ pl_inode->is_locked = _gf_false; ++ ++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list) ++ { ++ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now, ++ &contend); ++ } ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++ unwind_granted_inodes(xl, pl_inode, &granted); ++ ++ inodelk_contention_notify(xl, &contend); ++ ++ inode_unref(pl_inode->inode); ++} ++ ++void ++pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, ++ struct list_head *list) ++{ ++ call_stub_t *stub, *tmp; ++ ++ if (!pl_inode->is_locked) { ++ return; ++ } ++ ++ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list) ++ { ++ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL, ++ NULL)) { ++ list_move_tail(&stub->list, list); ++ } ++ } ++} ++ ++/* This function determines if an inodelk attempt can be done now or it needs ++ * to wait. ++ * ++ * Possible return values: ++ * < 0: An error occurred. Currently only -ESTALE can be returned if the ++ * inode has been deleted previously by unlink/rmdir/rename ++ * = 0: The lock can be attempted. ++ * > 0: The lock needs to wait because a conflicting remove operation is ++ * ongoing. ++ */ ++int32_t ++pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock) ++{ ++ pl_dom_list_t *dom; ++ pl_inode_lock_t *ilock; ++ ++ /* If the inode has been deleted, we won't allow any lock. */ ++ if (pl_inode->removed) { ++ return -ESTALE; ++ } ++ ++ /* We only synchronize with locks made for regular operations coming from ++ * the user. Locks done for internal purposes are hard to control and could ++ * lead to long delays or deadlocks quite easily. */ ++ if (lock->client_pid < 0) { ++ return 0; ++ } ++ if (!pl_inode->is_locked) { ++ return 0; ++ } ++ if (pl_inode->remove_running > 0) { ++ return 1; ++ } ++ ++ list_for_each_entry(dom, &pl_inode->dom_list, inode_list) ++ { ++ list_for_each_entry(ilock, &dom->inodelk_list, list) ++ { ++ /* If a lock from the same client is already granted, we allow this ++ * one to continue. This is necessary to prevent deadlocks when ++ * multiple locks are taken for the same operation. ++ * ++ * On the other side it's unlikely that the same client sends ++ * completely unrelated locks for the same inode. ++ */ ++ if (ilock->client == lock->client) { ++ return 0; ++ } ++ } ++ } ++ ++ return 1; ++} +diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h +index ea86b96..6c81ac3 100644 +--- a/xlators/features/locks/src/common.h ++++ b/xlators/features/locks/src/common.h +@@ -105,6 +105,15 @@ void + __pl_inodelk_unref(pl_inode_lock_t *lock); + + void ++__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, ++ struct list_head *granted, pl_dom_list_t *dom, ++ struct timespec *now, struct list_head *contend); ++ ++void ++unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, ++ struct list_head *granted); ++ ++void + grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + pl_dom_list_t *dom, struct timespec *now, + struct list_head *contend); +@@ -204,6 +213,16 @@ pl_metalock_is_active(pl_inode_t *pl_inode); + void + __pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock); + ++void ++inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock, ++ struct timespec *now, ++ struct list_head *contend); ++ ++void ++entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock, ++ struct timespec *now, ++ struct list_head *contend); ++ + gf_boolean_t + pl_does_monkey_want_stuck_lock(); + +@@ -216,4 +235,28 @@ pl_clean_local(pl_local_t *local); + int + pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd); + ++gf_boolean_t ++pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client); ++ ++int32_t ++pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc, ++ pl_inode_t **ppl_inode, struct list_head *contend); ++ ++int32_t ++pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub, ++ struct list_head *contend); ++ ++void ++pl_inode_remove_wake(struct list_head *list); ++ ++void ++pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error); ++ ++void ++pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode, ++ struct list_head *list); ++ ++int32_t ++pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock); ++ + #endif /* __COMMON_H__ */ +diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c +index 93c649c..b97836f 100644 +--- a/xlators/features/locks/src/entrylk.c ++++ b/xlators/features/locks/src/entrylk.c +@@ -197,9 +197,9 @@ out: + return revoke_lock; + } + +-static gf_boolean_t +-__entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, +- struct timespec *now) ++void ++entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock, ++ struct timespec *now, struct list_head *contend) + { + posix_locks_private_t *priv; + int64_t elapsed; +@@ -209,7 +209,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { +- return _gf_false; ++ return; + } + + elapsed = now->tv_sec; +@@ -218,7 +218,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { +- return _gf_false; ++ return; + } + + /* All contention notifications will be sent outside of the locked +@@ -231,7 +231,7 @@ __entrylk_needs_contention_notify(xlator_t *this, pl_entry_lock_t *lock, + + lock->contention_time = *now; + +- return _gf_true; ++ list_add_tail(&lock->contend, contend); + } + + void +@@ -325,9 +325,7 @@ __entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock, + break; + } + } +- if (__entrylk_needs_contention_notify(this, tmp, now)) { +- list_add_tail(&tmp->contend, contend); +- } ++ entrylk_contention_notify_check(this, tmp, now, contend); + } + } + +@@ -690,10 +688,9 @@ __grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode, + bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend); + + if (bl_ret == 0) { +- list_add(&bl->blocked_locks, granted); ++ list_add_tail(&bl->blocked_locks, granted); + } + } +- return; + } + + /* Grants locks if possible which are blocked on a lock */ +diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c +index 24dee49..1a07243 100644 +--- a/xlators/features/locks/src/inodelk.c ++++ b/xlators/features/locks/src/inodelk.c +@@ -231,9 +231,9 @@ out: + return revoke_lock; + } + +-static gf_boolean_t +-__inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, +- struct timespec *now) ++void ++inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock, ++ struct timespec *now, struct list_head *contend) + { + posix_locks_private_t *priv; + int64_t elapsed; +@@ -243,7 +243,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, + /* If this lock is in a list, it means that we are about to send a + * notification for it, so no need to do anything else. */ + if (!list_empty(&lock->contend)) { +- return _gf_false; ++ return; + } + + elapsed = now->tv_sec; +@@ -252,7 +252,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, + elapsed--; + } + if (elapsed < priv->notify_contention_delay) { +- return _gf_false; ++ return; + } + + /* All contention notifications will be sent outside of the locked +@@ -265,7 +265,7 @@ __inodelk_needs_contention_notify(xlator_t *this, pl_inode_lock_t *lock, + + lock->contention_time = *now; + +- return _gf_true; ++ list_add_tail(&lock->contend, contend); + } + + void +@@ -353,9 +353,7 @@ __inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock, + break; + } + } +- if (__inodelk_needs_contention_notify(this, l, now)) { +- list_add_tail(&l->contend, contend); +- } ++ inodelk_contention_notify_check(this, l, now, contend); + } + } + +@@ -435,12 +433,17 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + struct list_head *contend) + { + pl_inode_lock_t *conf = NULL; +- int ret = -EINVAL; ++ int ret; + +- conf = __inodelk_grantable(this, dom, lock, now, contend); +- if (conf) { +- ret = __lock_blocked_add(this, dom, lock, can_block); +- goto out; ++ ret = pl_inode_remove_inodelk(pl_inode, lock); ++ if (ret < 0) { ++ return ret; ++ } ++ if (ret == 0) { ++ conf = __inodelk_grantable(this, dom, lock, now, contend); ++ } ++ if ((ret > 0) || (conf != NULL)) { ++ return __lock_blocked_add(this, dom, lock, can_block); + } + + /* To prevent blocked locks starvation, check if there are any blocked +@@ -462,17 +465,13 @@ __lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, + "starvation"); + } + +- ret = __lock_blocked_add(this, dom, lock, can_block); +- goto out; ++ return __lock_blocked_add(this, dom, lock, can_block); + } + __pl_inodelk_ref(lock); + gettimeofday(&lock->granted_time, NULL); + list_add(&lock->list, &dom->inodelk_list); + +- ret = 0; +- +-out: +- return ret; ++ return 0; + } + + /* Return true if the two inodelks have exactly same lock boundaries */ +@@ -529,12 +528,11 @@ out: + return conf; + } + +-static void ++void + __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + struct list_head *granted, pl_dom_list_t *dom, + struct timespec *now, struct list_head *contend) + { +- int bl_ret = 0; + pl_inode_lock_t *bl = NULL; + pl_inode_lock_t *tmp = NULL; + +@@ -547,52 +545,48 @@ __grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + { + list_del_init(&bl->blocked_locks); + +- bl_ret = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); ++ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend); + +- if (bl_ret == 0) { +- list_add(&bl->blocked_locks, granted); ++ if (bl->status != -EAGAIN) { ++ list_add_tail(&bl->blocked_locks, granted); + } + } +- return; + } + +-/* Grant all inodelks blocked on a lock */ + void +-grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, +- pl_dom_list_t *dom, struct timespec *now, +- struct list_head *contend) ++unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode, ++ struct list_head *granted) + { +- struct list_head granted; + pl_inode_lock_t *lock; + pl_inode_lock_t *tmp; ++ int32_t op_ret; ++ int32_t op_errno; + +- INIT_LIST_HEAD(&granted); +- +- pthread_mutex_lock(&pl_inode->mutex); +- { +- __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, +- contend); +- } +- pthread_mutex_unlock(&pl_inode->mutex); +- +- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) ++ list_for_each_entry_safe(lock, tmp, granted, blocked_locks) + { +- gf_log(this->name, GF_LOG_TRACE, +- "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted", +- lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid, +- lkowner_utoa(&lock->owner), lock->user_flock.l_start, +- lock->user_flock.l_len); +- ++ if (lock->status == 0) { ++ op_ret = 0; ++ op_errno = 0; ++ gf_log(this->name, GF_LOG_TRACE, ++ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 ++ " => Granted", ++ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", ++ lock->client_pid, lkowner_utoa(&lock->owner), ++ lock->user_flock.l_start, lock->user_flock.l_len); ++ } else { ++ op_ret = -1; ++ op_errno = -lock->status; ++ } + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, +- 0, 0, lock->volume); ++ op_ret, op_errno, lock->volume); + +- STACK_UNWIND_STRICT(inodelk, lock->frame, 0, 0, NULL); ++ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL); + lock->frame = NULL; + } + + pthread_mutex_lock(&pl_inode->mutex); + { +- list_for_each_entry_safe(lock, tmp, &granted, blocked_locks) ++ list_for_each_entry_safe(lock, tmp, granted, blocked_locks) + { + list_del_init(&lock->blocked_locks); + __pl_inodelk_unref(lock); +@@ -601,6 +595,26 @@ grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, + pthread_mutex_unlock(&pl_inode->mutex); + } + ++/* Grant all inodelks blocked on a lock */ ++void ++grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode, ++ pl_dom_list_t *dom, struct timespec *now, ++ struct list_head *contend) ++{ ++ struct list_head granted; ++ ++ INIT_LIST_HEAD(&granted); ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ { ++ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now, ++ contend); ++ } ++ pthread_mutex_unlock(&pl_inode->mutex); ++ ++ unwind_granted_inodes(this, pl_inode, &granted); ++} ++ + static void + pl_inodelk_log_cleanup(pl_inode_lock_t *lock) + { +@@ -662,7 +676,7 @@ pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx) + * and blocked lists, then this means that a parallel + * unlock on another inodelk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in +- * __grant_blocked_node_locks() (although using the ++ * __grant_blocked_inode_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked inodelks from other clients, now that L1 is +@@ -747,6 +761,7 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + gf_boolean_t need_inode_unref = _gf_false; + struct list_head *pcontend = NULL; + struct list_head contend; ++ struct list_head wake; + struct timespec now = {}; + short fl_type; + +@@ -798,6 +813,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + timespec_now(&now); + } + ++ INIT_LIST_HEAD(&wake); ++ + if (ctx) + pthread_mutex_lock(&ctx->lock); + pthread_mutex_lock(&pl_inode->mutex); +@@ -820,18 +837,17 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + lock->fl_type == F_UNLCK ? "Unlock" : "Lock", + lock->client_pid, lkowner_utoa(&lock->owner), + lock->user_flock.l_start, lock->user_flock.l_len); +- if (can_block) ++ if (can_block) { + unref = _gf_false; +- /* For all but the case where a non-blocking +- * lock attempt fails, the extra ref taken at +- * the start of this function must be negated. +- */ +- else +- need_inode_unref = _gf_true; ++ } + } +- +- if (ctx && (!ret || can_block)) ++ /* For all but the case where a non-blocking lock attempt fails ++ * with -EAGAIN, the extra ref taken at the start of this function ++ * must be negated. */ ++ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block); ++ if (ctx && !need_inode_unref) { + list_add_tail(&lock->client_list, &ctx->inodelk_lockers); ++ } + } else { + /* Irrespective of whether unlock succeeds or not, + * the extra inode ref that was done at the start of +@@ -849,6 +865,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + list_del_init(&retlock->client_list); + __pl_inodelk_unref(retlock); + ++ pl_inode_remove_unlocked(this, pl_inode, &wake); ++ + ret = 0; + } + out: +@@ -859,6 +877,8 @@ pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + if (ctx) + pthread_mutex_unlock(&ctx->lock); + ++ pl_inode_remove_wake(&wake); ++ + /* The following (extra) unref corresponds to the ref that + * was done at the time the lock was granted. + */ +@@ -1033,10 +1053,14 @@ pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + inode); + + if (ret < 0) { +- if ((can_block) && (F_UNLCK != lock_type)) { +- goto out; ++ if (ret == -EAGAIN) { ++ if (can_block && (F_UNLCK != lock_type)) { ++ goto out; ++ } ++ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); ++ } else { ++ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret); + } +- gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN"); + op_errno = -ret; + goto unwind; + } +diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h +index aa267de..6666feb 100644 +--- a/xlators/features/locks/src/locks.h ++++ b/xlators/features/locks/src/locks.h +@@ -102,6 +102,9 @@ struct __pl_inode_lock { + + struct list_head client_list; /* list of all locks from a client */ + short fl_type; ++ ++ int32_t status; /* Error code when we try to grant a lock in blocked ++ state */ + }; + typedef struct __pl_inode_lock pl_inode_lock_t; + +@@ -164,13 +167,14 @@ struct __pl_inode { + struct list_head rw_list; /* list of waiting r/w requests */ + struct list_head reservelk_list; /* list of reservelks */ + struct list_head blocked_reservelks; /* list of blocked reservelks */ +- struct list_head +- blocked_calls; /* List of blocked lock calls while a reserve is held*/ +- struct list_head metalk_list; /* Meta lock list */ +- /* This is to store the incoming lock +- requests while meta lock is enabled */ +- struct list_head queued_locks; +- int mandatory; /* if mandatory locking is enabled */ ++ struct list_head blocked_calls; /* List of blocked lock calls while a ++ reserve is held*/ ++ struct list_head metalk_list; /* Meta lock list */ ++ struct list_head queued_locks; /* This is to store the incoming lock ++ requests while meta lock is enabled */ ++ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir ++ the inode. */ ++ int mandatory; /* if mandatory locking is enabled */ + + inode_t *refkeeper; /* hold refs on an inode while locks are + held to prevent pruning */ +@@ -197,6 +201,11 @@ struct __pl_inode { + */ + int fop_wind_count; + pthread_cond_t check_fop_wind_count; ++ ++ int32_t links; /* Number of hard links the inode has. */ ++ uint32_t remove_running; /* Number of remove operations running. */ ++ gf_boolean_t is_locked; /* Regular locks will be blocked. */ ++ gf_boolean_t removed; /* The inode has been deleted. */ + }; + typedef struct __pl_inode pl_inode_t; + +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 7887b82..5ae0125 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -147,6 +147,29 @@ fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **); + } \ + } while (0) + ++#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \ ++ _args...) \ ++ ({ \ ++ struct list_head contend; \ ++ pl_inode_t *__pl_inode; \ ++ call_stub_t *__stub; \ ++ int32_t __error; \ ++ INIT_LIST_HEAD(&contend); \ ++ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \ ++ &__pl_inode, &contend); \ ++ if (__error < 0) { \ ++ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \ ++ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \ ++ &contend); \ ++ } else if (__error == 0) { \ ++ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \ ++ _loc2); \ ++ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \ ++ FIRST_CHILD(_xl)->fops->_fop, ##_args); \ ++ } \ ++ __error; \ ++ }) ++ + gf_boolean_t + pl_has_xdata_requests(dict_t *xdata) + { +@@ -2969,11 +2992,85 @@ out: + return ret; + } + ++static int32_t ++pl_request_link_count(dict_t **pxdata) ++{ ++ dict_t *xdata; ++ ++ xdata = *pxdata; ++ if (xdata == NULL) { ++ xdata = dict_new(); ++ if (xdata == NULL) { ++ return ENOMEM; ++ } ++ } else { ++ dict_ref(xdata); ++ } ++ ++ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) { ++ dict_unref(xdata); ++ return ENOMEM; ++ } ++ ++ *pxdata = xdata; ++ ++ return 0; ++} ++ ++static int32_t ++pl_check_link_count(dict_t *xdata) ++{ ++ int32_t count; ++ ++ /* In case we are unable to read the link count from xdata, we take a ++ * conservative approach and return -2, which will prevent the inode from ++ * being considered deleted. In fact it will cause link tracking for this ++ * inode to be disabled completely to avoid races. */ ++ ++ if (xdata == NULL) { ++ return -2; ++ } ++ ++ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) { ++ return -2; ++ } ++ ++ return count; ++} ++ + int32_t + pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) + { ++ pl_inode_t *pl_inode; ++ ++ if (op_ret >= 0) { ++ pl_inode = pl_inode_get(this, inode, NULL); ++ if (pl_inode == NULL) { ++ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL, ++ NULL); ++ return 0; ++ } ++ ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ /* We only update the link count if we previously didn't know it. ++ * Doing it always can lead to races since lookup is not executed ++ * atomically most of the times. */ ++ if (pl_inode->links == -2) { ++ pl_inode->links = pl_check_link_count(xdata); ++ if (buf->ia_type == IA_IFDIR) { ++ /* Directories have at least 2 links. To avoid special handling ++ * for directories, we simply decrement the value here to make ++ * them equivalent to regular files. */ ++ pl_inode->links--; ++ } ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ } ++ + PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; +@@ -2982,9 +3079,17 @@ pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t + pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); +- STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->lookup, loc, xdata); ++ int32_t error; ++ ++ error = pl_request_link_count(&xdata); ++ if (error == 0) { ++ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); ++ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->lookup, loc, xdata); ++ dict_unref(xdata); ++ } else { ++ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL); ++ } + return 0; + } + +@@ -3792,6 +3897,10 @@ unlock: + gf_proc_dump_write("posixlk-count", "%d", count); + __dump_posixlks(pl_inode); + } ++ ++ gf_proc_dump_write("links", "%d", pl_inode->links); ++ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running); ++ gf_proc_dump_write("removed", "%u", pl_inode->removed); + } + pthread_mutex_unlock(&pl_inode->mutex); + +@@ -4137,8 +4246,11 @@ pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + struct iatt *postoldparent, struct iatt *prenewparent, + struct iatt *postnewparent, dict_t *xdata) + { ++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); ++ + PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); ++ + return 0; + } + +@@ -4146,10 +4258,15 @@ int32_t + pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); ++ int32_t error; ++ ++ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename, ++ pl_rename_cbk, oldloc, newloc, xdata); ++ if (error > 0) { ++ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL, ++ NULL, NULL); ++ } + +- STACK_WIND(frame, pl_rename_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + +@@ -4273,8 +4390,11 @@ pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) + { ++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); ++ + PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent, + postparent, xdata); ++ + return 0; + } + +@@ -4282,9 +4402,14 @@ int32_t + pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); +- STACK_WIND(frame, pl_unlink_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); ++ int32_t error; ++ ++ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink, ++ pl_unlink_cbk, loc, xflag, xdata); ++ if (error > 0) { ++ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL); ++ } ++ + return 0; + } + +@@ -4351,8 +4476,11 @@ pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) + { ++ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0); ++ + PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent, + postparent, xdata); ++ + return 0; + } + +@@ -4360,9 +4488,14 @@ int + pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) + { +- PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL); +- STACK_WIND(frame, pl_rmdir_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); ++ int32_t error; ++ ++ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir, ++ pl_rmdir_cbk, loc, xflags, xdata); ++ if (error > 0) { ++ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL); ++ } ++ + return 0; + } + +@@ -4392,6 +4525,19 @@ pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) + { ++ pl_inode_t *pl_inode = (pl_inode_t *)cookie; ++ ++ if (op_ret >= 0) { ++ pthread_mutex_lock(&pl_inode->mutex); ++ ++ /* TODO: can happen pl_inode->links == 0 ? */ ++ if (pl_inode->links >= 0) { ++ pl_inode->links++; ++ } ++ ++ pthread_mutex_unlock(&pl_inode->mutex); ++ } ++ + PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +@@ -4401,9 +4547,18 @@ int + pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { ++ pl_inode_t *pl_inode; ++ ++ pl_inode = pl_inode_get(this, oldloc->inode, NULL); ++ if (pl_inode == NULL) { ++ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, ++ NULL); ++ return 0; ++ } ++ + PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc); +- STACK_WIND(frame, pl_link_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); ++ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0456-add-clean-local-after-grant-lock.patch b/SOURCES/0456-add-clean-local-after-grant-lock.patch new file mode 100644 index 0000000..6b8210b --- /dev/null +++ b/SOURCES/0456-add-clean-local-after-grant-lock.patch @@ -0,0 +1,74 @@ +From c38b38249fdf951565f6501ce8e9a4d01142d43e Mon Sep 17 00:00:00 2001 +From: l17zhou <cynthia.zhou@nokia-sbell.com> +Date: Tue, 3 Dec 2019 07:43:35 +0200 +Subject: [PATCH 456/456] add clean local after grant lock + +found by flock test, without correct ref number of fd, +lock will not be correctly released. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/23794 +> Fixes: bz#1779089 +> Change-Id: I3e466b17c852eb219c8778e43af8ad670a8449cc +> Signed-off-by: l17zhou <cynthia.zhou@nokia-sbell.com> + +BUG: 1854165 +Change-Id: I3e466b17c852eb219c8778e43af8ad670a8449cc +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/206673 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/locks/src/common.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c +index 0c52853..cddbfa6 100644 +--- a/xlators/features/locks/src/common.c ++++ b/xlators/features/locks/src/common.c +@@ -961,7 +961,7 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; +- ++ pl_local_t *local = NULL; + INIT_LIST_HEAD(&granted_list); + + pthread_mutex_lock(&pl_inode->mutex); +@@ -976,9 +976,9 @@ grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode) + + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); +- +- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); +- ++ local = lock->frame->local; ++ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, ++ &lock->user_flock, NULL); + __destroy_lock(lock); + } + +@@ -997,6 +997,7 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + struct list_head granted_list; + posix_lock_t *tmp = NULL; + posix_lock_t *lock = NULL; ++ pl_local_t *local = NULL; + + int ret = -1; + +@@ -1024,9 +1025,9 @@ pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode, + + pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock, + 0, 0, NULL); +- +- STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL); +- ++ local = lock->frame->local; ++ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0, ++ &lock->user_flock, NULL); + __destroy_lock(lock); + } + +-- +1.8.3.1 + diff --git a/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch b/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch new file mode 100644 index 0000000..be9202a --- /dev/null +++ b/SOURCES/0457-cluster-ec-Improve-detection-of-new-heals.patch @@ -0,0 +1,409 @@ +From 3e8b3a2c2c6f83635486035fc8040c87d89813d2 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Thu, 2 Jul 2020 18:08:52 +0200 +Subject: [PATCH 457/465] cluster/ec: Improve detection of new heals + +When EC successfully healed a directory it assumed that maybe other +entries inside that directory could have been created, which could +require additional heal cycles. For this reason, when the heal happened +as part of one index heal iteration, it triggered a new iteration. + +The problem happened when the directory was healthy, so no new entries +were added, but its index entry was not removed for some reason. In +this case self-heal started and endless loop healing the same directory +continuously, cause high CPU utilization. + +This patch improves detection of new files added to the heal index so +that a new index heal iteration is only triggered if there is new work +to do. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24665/ +>Fixes: #1354 + +Change-Id: I2355742b85fbfa6de758bccc5d2e1a283c82b53f +BUG: 1852736 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208041 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ashish Pandey <aspandey@redhat.com> +--- + xlators/cluster/ec/src/ec-common.c | 2 +- + xlators/cluster/ec/src/ec-heal.c | 58 +++++++++++++++++++++++----------- + xlators/cluster/ec/src/ec-heald.c | 24 ++++++++++---- + xlators/cluster/ec/src/ec-inode-read.c | 27 ++++++++++++++-- + xlators/cluster/ec/src/ec-types.h | 4 +-- + xlators/cluster/ec/src/ec.h | 1 + + 6 files changed, 86 insertions(+), 30 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c +index e580bfb..e3f8769 100644 +--- a/xlators/cluster/ec/src/ec-common.c ++++ b/xlators/cluster/ec/src/ec-common.c +@@ -230,7 +230,7 @@ ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx) + int32_t + ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good, +- uintptr_t bad, dict_t *xdata) ++ uintptr_t bad, uint32_t pending, dict_t *xdata) + { + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL, +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 06a7016..e2de879 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -72,6 +72,7 @@ struct ec_name_data { + char *name; + inode_t *parent; + default_args_cbk_t *replies; ++ uint32_t heal_pending; + }; + + static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL}; +@@ -996,6 +997,7 @@ ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia, + ret = -ENOTCONN; + goto out; + } ++ + out: + if (xattr) + dict_unref(xattr); +@@ -1164,6 +1166,7 @@ ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + dict_t *xdata = NULL; + char *linkname = NULL; + ec_config_t config; ++ + /* There should be just one gfid key */ + EC_REPLIES_ALLOC(replies, ec->nodes); + if (gfid_db->count != 1) { +@@ -1408,6 +1411,11 @@ __ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name, + + ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent, + participants); ++ if (ret >= 0) { ++ /* If ec_create_name() succeeded we return 1 to indicate that a new ++ * file has been created and it will need to be healed. */ ++ ret = 1; ++ } + out: + cluster_replies_wipe(replies, ec->nodes); + loc_wipe(&loc); +@@ -1485,18 +1493,22 @@ ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name, + name_on); + +- if (ret < 0) ++ if (ret < 0) { + memset(name_on, 0, ec->nodes); ++ } else { ++ name_data->heal_pending += ret; ++ } + + for (i = 0; i < ec->nodes; i++) + if (name_data->participants[i] && !name_on[i]) + name_data->failed_on[i] = 1; ++ + return 0; + } + + int + ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, +- unsigned char *participants) ++ unsigned char *participants, uint32_t *pending) + { + int i = 0; + int j = 0; +@@ -1509,7 +1521,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + name_data.frame = frame; + name_data.participants = participants; + name_data.failed_on = alloca0(ec->nodes); +- ; ++ name_data.heal_pending = 0; + + for (i = 0; i < ec->nodes; i++) { + if (!participants[i]) +@@ -1528,6 +1540,8 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + break; + } + } ++ *pending += name_data.heal_pending; ++ + loc_wipe(&loc); + return ret; + } +@@ -1535,7 +1549,7 @@ ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode, + int + __ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + unsigned char *heal_on, unsigned char *sources, +- unsigned char *healed_sinks) ++ unsigned char *healed_sinks, uint32_t *pending) + { + unsigned char *locked_on = NULL; + unsigned char *output = NULL; +@@ -1580,7 +1594,7 @@ unlock: + if (sources[i] || healed_sinks[i]) + participants[i] = 1; + } +- ret = ec_heal_names(frame, ec, inode, participants); ++ ret = ec_heal_names(frame, ec, inode, participants, pending); + + if (EC_COUNT(participants, ec->nodes) <= ec->fragments) + goto out; +@@ -1601,7 +1615,8 @@ out: + + int + ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, +- unsigned char *sources, unsigned char *healed_sinks) ++ unsigned char *sources, unsigned char *healed_sinks, ++ uint32_t *pending) + { + unsigned char *locked_on = NULL; + unsigned char *up_subvols = NULL; +@@ -1632,7 +1647,7 @@ ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode, + goto unlock; + } + ret = __ec_heal_entry(frame, ec, inode, locked_on, sources, +- healed_sinks); ++ healed_sinks, pending); + } + unlock: + cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame, +@@ -1953,14 +1968,14 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, + (heal->good | heal->bad), heal->good, heal->bad, +- NULL); ++ 0, NULL); + } + + return EC_STATE_END; + case -EC_STATE_REPORT: + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, -1, fop->error, 0, +- 0, 0, NULL); ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1, ++ fop->error, 0, 0, 0, 0, NULL); + } + + return EC_STATE_END; +@@ -1997,14 +2012,15 @@ out: + if (fop != NULL) { + ec_manager(fop, error); + } else { +- func(frame, NULL, this, -1, error, 0, 0, 0, NULL); ++ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL); + } + } + + int32_t + ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, uintptr_t mask, +- uintptr_t good, uintptr_t bad, dict_t *xdata) ++ uintptr_t good, uintptr_t bad, uint32_t pending, ++ dict_t *xdata) + { + ec_fop_data_t *fop = cookie; + ec_heal_t *heal = fop->data; +@@ -2489,6 +2505,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + intptr_t mbad = 0; + intptr_t good = 0; + intptr_t bad = 0; ++ uint32_t pending = 0; + ec_fop_data_t *fop = data; + gf_boolean_t blocking = _gf_false; + ec_heal_need_t need_heal = EC_HEAL_NONEED; +@@ -2524,7 +2541,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + if (loc->name && strlen(loc->name)) { + ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name, + participants); +- if (ret == 0) { ++ if (ret >= 0) { + gf_msg_debug(this->name, 0, + "%s: name heal " + "successful on %" PRIXPTR, +@@ -2542,7 +2559,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + + /* Mount triggers heal only when it detects that it must need heal, shd + * triggers heals periodically which need not be thorough*/ +- if (ec->shd.iamshd) { ++ if (ec->shd.iamshd && (ret <= 0)) { + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, + &need_heal); + +@@ -2552,13 +2569,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + goto out; + } + } ++ + sources = alloca0(ec->nodes); + healed_sinks = alloca0(ec->nodes); + if (IA_ISREG(loc->inode->ia_type)) { + ret = ec_heal_data(frame, ec, blocking, loc->inode, sources, + healed_sinks); + } else if (IA_ISDIR(loc->inode->ia_type) && !partial) { +- ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks); ++ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks, ++ &pending); + } else { + ret = 0; + memcpy(sources, participants, ec->nodes); +@@ -2588,10 +2607,11 @@ out: + if (fop->cbks.heal) { + fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), +- mgood & good, mbad & bad, NULL); ++ mgood & good, mbad & bad, pending, NULL); + } + if (frame) + STACK_DESTROY(frame->root); ++ + return; + } + +@@ -2638,8 +2658,8 @@ void + ec_heal_fail(ec_t *ec, ec_fop_data_t *fop) + { + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, NULL, ec->xl, -1, fop->error, 0, 0, 0, +- NULL); ++ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0, ++ 0, 0, NULL); + } + ec_fop_data_release(fop); + } +@@ -2826,7 +2846,7 @@ fail: + if (fop) + ec_fop_data_release(fop); + if (func) +- func(frame, NULL, this, -1, err, 0, 0, 0, NULL); ++ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL); + } + + int +diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c +index cba111a..4f4b6aa 100644 +--- a/xlators/cluster/ec/src/ec-heald.c ++++ b/xlators/cluster/ec/src/ec-heald.c +@@ -156,15 +156,27 @@ int + ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, + gf_boolean_t full) + { ++ dict_t *xdata = NULL; ++ uint32_t count; + int32_t ret; + +- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL); +- if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { ++ ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata); ++ if (!full && (loc->inode->ia_type == IA_IFDIR)) { + /* If we have just healed a directory, it's possible that +- * other index entries have appeared to be healed. We put a +- * mark so that we can check it later and restart a scan +- * without delay. */ +- healer->rerun = _gf_true; ++ * other index entries have appeared to be healed. */ ++ if ((xdata != NULL) && ++ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) && ++ (count > 0)) { ++ /* Force a rerun of the index healer. */ ++ gf_msg_debug(healer->this->name, 0, "%d more entries to heal", ++ count); ++ ++ healer->rerun = _gf_true; ++ } ++ } ++ ++ if (xdata != NULL) { ++ dict_unref(xdata); + } + + return ret; +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index f87a94a..e82e8f6 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -393,7 +393,8 @@ ec_manager_getxattr(ec_fop_data_t *fop, int32_t state) + int32_t + ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + int32_t op_ret, int32_t op_errno, uintptr_t mask, +- uintptr_t good, uintptr_t bad, dict_t *xdata) ++ uintptr_t good, uintptr_t bad, uint32_t pending, ++ dict_t *xdata) + { + ec_fop_data_t *fop = cookie; + fop_getxattr_cbk_t func = fop->data; +@@ -402,6 +403,25 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + char *str; + char bin1[65], bin2[65]; + ++ /* We try to return the 'pending' information in xdata, but if this cannot ++ * be set, we will ignore it silently. We prefer to report the success or ++ * failure of the heal itself. */ ++ if (xdata == NULL) { ++ xdata = dict_new(); ++ } else { ++ dict_ref(xdata); ++ } ++ if (xdata != NULL) { ++ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) { ++ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc ++ * enforces to check the result in this case. However we don't ++ * really care if it succeeded or not. We'll just do the same. ++ * ++ * This empty 'if' avoids the warning, and it will be removed by ++ * the optimizer. */ ++ } ++ } ++ + if (op_ret >= 0) { + dict = dict_new(); + if (dict == NULL) { +@@ -435,11 +455,14 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + } + + out: +- func(frame, NULL, xl, op_ret, op_errno, dict, NULL); ++ func(frame, NULL, xl, op_ret, op_errno, dict, xdata); + + if (dict != NULL) { + dict_unref(dict); + } ++ if (xdata != NULL) { ++ dict_unref(xdata); ++ } + + return 0; + } +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index 34a9768..f15429d 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -186,10 +186,10 @@ struct _ec_inode { + + typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, + int32_t, uintptr_t, uintptr_t, uintptr_t, +- dict_t *); ++ uint32_t, dict_t *); + typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t, + int32_t, uintptr_t, uintptr_t, uintptr_t, +- dict_t *); ++ uint32_t, dict_t *); + + union _ec_cbk { + fop_access_cbk_t access; +diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h +index 1b210d9..6f6de6d 100644 +--- a/xlators/cluster/ec/src/ec.h ++++ b/xlators/cluster/ec/src/ec.h +@@ -18,6 +18,7 @@ + #define EC_XATTR_SIZE EC_XATTR_PREFIX "size" + #define EC_XATTR_VERSION EC_XATTR_PREFIX "version" + #define EC_XATTR_HEAL EC_XATTR_PREFIX "heal" ++#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new" + #define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty" + #define EC_STRIPE_CACHE_MAX_SIZE 10 + #define EC_VERSION_SIZE 2 +-- +1.8.3.1 + diff --git a/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch b/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch new file mode 100644 index 0000000..b7b9f04 --- /dev/null +++ b/SOURCES/0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch @@ -0,0 +1,182 @@ +From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat <raghavendra@redhat.com> +Date: Mon, 15 Apr 2019 14:09:34 -0400 +Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after + cancelling the signer thread + +When bit-rot feature is disabled, the signer thread from the bit-rot-stub +xlator (the thread which performs the setxattr of the signature on to the +disk) is cancelled. But, if the cancelled signer thread had already held +the mutex (&priv->lock) which it uses to monitor the queue of files to +be signed, then the mutex is never released. This creates problems in +future when the feature is enabled again. Both the new instance of the +signer thread and the regular thread which enqueues the files to be +signed will be blocked on this mutex. + +So, as part of cancelling the signer thread, unlock the mutex associated +with it as well using pthread_cleanup_push and pthread_cleanup_pop. + +Upstream patch: + > patch: https://review.gluster.org/22572 + > fixes: #bz1700078 + > Change-Id: Ib761910caed90b268e69794ddeb108165487af40 + +Change-Id: Ib761910caed90b268e69794ddeb108165487af40 +BUG: 1851424 +Signed-off-by: Raghavendra M <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208304 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +- + xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++--- + 2 files changed, 59 insertions(+), 7 deletions(-) + +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +index 7f07f29..155802b 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, + BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, + BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, + BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, +- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL); ++ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, ++ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, ++ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL); + + #endif /* !_BITROT_STUB_MESSAGES_H_ */ +diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +index 3f48a4b..c3f81bc 100644 +--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c ++++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c +@@ -26,6 +26,15 @@ + + #define BR_STUB_REQUEST_COOKIE 0x1 + ++void ++br_stub_lock_cleaner(void *arg) ++{ ++ pthread_mutex_t *clean_mutex = arg; ++ ++ pthread_mutex_unlock(clean_mutex); ++ return; ++} ++ + void * + br_stub_signth(void *); + +@@ -166,8 +175,11 @@ init(xlator_t *this) + + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); +- if (ret != 0) ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, ++ "failed to create the new thread for signer"); + goto cleanup_lock; ++ } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { +@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options) + priv = this->private; + + GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); +- if (priv->do_versioning) { ++ if (priv->do_versioning && !priv->signth) { + ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, + "brssign"); +- if (ret != 0) ++ if (ret != 0) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, ++ BRS_MSG_SPAWN_SIGN_THRD_FAILED, ++ "failed to create the new thread for signer"); + goto err; ++ } + + ret = br_stub_bad_object_container_init(this, priv); + if (ret) { +@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options) + gf_msg(this->name, GF_LOG_ERROR, 0, + BRS_MSG_CANCEL_SIGN_THREAD_FAILED, + "Could not cancel sign serializer thread"); ++ } else { ++ gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, ++ "killed the signer thread"); ++ priv->signth = 0; + } +- priv->signth = 0; + } + + if (priv->container.thread) { +@@ -902,6 +921,24 @@ br_stub_signth(void *arg) + + THIS = this; + while (1) { ++ /* ++ * Disabling bit-rot feature leads to this particular thread ++ * getting cleaned up by reconfigure via a call to the function ++ * gf_thread_cleanup_xint (which in turn calls pthread_cancel ++ * and pthread_join). But, if this thread had held the mutex ++ * &priv->lock at the time of cancellation, then it leads to ++ * deadlock in future when bit-rot feature is enabled (which ++ * again spawns this thread which cant hold the lock as the ++ * mutex is still held by the previous instance of the thread ++ * which got killed). Also, the br_stub_handle_object_signature ++ * function which is called whenever file has to be signed ++ * also gets blocked as it too attempts to acquire &priv->lock. ++ * ++ * So, arrange for the lock to be unlocked as part of the ++ * cleanup of this thread using pthread_cleanup_push and ++ * pthread_cleanup_pop. ++ */ ++ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); + pthread_mutex_lock(&priv->lock); + { + while (list_empty(&priv->squeue)) +@@ -912,6 +949,7 @@ br_stub_signth(void *arg) + list_del_init(&sigstub->list); + } + pthread_mutex_unlock(&priv->lock); ++ pthread_cleanup_pop(0); + + call_resume(sigstub->stub); + +@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, + + priv = this->private; + +- if (frame->root->pid != GF_CLIENT_PID_BITD) ++ if (frame->root->pid != GF_CLIENT_PID_BITD) { ++ gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, ++ "PID %d from where signature request" ++ "came, does not belong to bit-rot daemon." ++ "Unwinding the fop", ++ frame->root->pid); + goto dofop; ++ } + + ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); +- if (ret) ++ if (ret) { ++ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, ++ "failed to prepare the signature for %s. Unwinding the fop", ++ uuid_utoa(fd->inode->gfid)); + goto dofop; ++ } + if (fakesuccess) { + op_ret = op_errno = 0; + goto dofop; +@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + /* object signature request */ + ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); + if (!ret) { ++ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", ++ uuid_utoa(fd->inode->gfid)); + br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); + goto done; + } +-- +1.8.3.1 + diff --git a/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch b/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch new file mode 100644 index 0000000..2c9b66e --- /dev/null +++ b/SOURCES/0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch @@ -0,0 +1,181 @@ +From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001 +From: Raghavendra Bhat <raghavendra@redhat.com> +Date: Mon, 11 Mar 2019 12:16:50 -0400 +Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files + during oneshot crawl + +Currently bit-rot feature has an issue with disabling and reenabling it +on the same volume. Consider enabling bit-rot detection which goes on to +crawl and sign all the files present in the volume. Then some files are +modified and the bit-rot daemon goes on to sign the modified files with +the correct signature. Now, disable bit-rot feature. While, signing and +scrubbing are not happening, previous checksums of the files continue to +exist as extended attributes. Now, if some files with checksum xattrs get +modified, they are not signed with new signature as the feature is off. + +At this point, if the feature is enabled again, the bit rot daemon will +go and sign those files which does not have any bit-rot specific xattrs +(i.e. those files which were created after bit-rot was disabled). Whereas +the files with bit-rot xattrs wont get signed with proper new checksum. +At this point if scrubber runs, it finds the on disk checksum and the actual +checksum of the file to be different (because the file got modified) and +marks the file as corrupted. + +FIX: + +The fix is to unconditionally sign the files when the bit-rot daemon +comes up (instead of skipping the files with bit-rot xattrs). + +upstream fix: + > patch: https://review.gluster.org/#/c/glusterfs/+/22360/ + > fixes: #bz1700078 + > Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5 + +Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5 +BUG: 1851424 +Signed-off-by: Raghavendra M <raghavendra@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208305 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bitrot/bug-1700078.t | 87 +++++++++++++++++++++++++++++ + xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++- + 2 files changed, 101 insertions(+), 1 deletion(-) + create mode 100644 tests/bitrot/bug-1700078.t + +diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t +new file mode 100644 +index 0000000..f273742 +--- /dev/null ++++ b/tests/bitrot/bug-1700078.t +@@ -0,0 +1,87 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++cleanup; ++ ++## Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++ ++## Lets create and start the volume ++TEST $CLI volume create $V0 $H0:$B0/${V0}1 ++TEST $CLI volume start $V0 ++ ++## Enable bitrot for volume $V0 ++TEST $CLI volume bitrot $V0 enable ++ ++## Turn off quick-read so that it wont cache the contents ++# of the file in lookup. For corrupted files, it might ++# end up in reads being served from the cache instead of ++# an error. ++TEST $CLI volume set $V0 performance.quick-read off ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location' ++ ++## Set expiry-timeout to 1 sec ++TEST $CLI volume set $V0 features.expiry-time 1 ++ ++##Mount $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++ ++## Turn off quick-read xlator so that, the contents are not served from the ++# quick-read cache. ++TEST $CLI volume set $V0 performance.quick-read off ++ ++#Create sample file ++TEST `echo "1234" > $M0/FILE1` ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1" ++ ++##disable bitrot ++TEST $CLI volume bitrot $V0 disable ++ ++## modify the file ++TEST `echo "write" >> $M0/FILE1` ++ ++# unmount and remount when the file has to be accessed. ++# This is to ensure that, when the remount happens, ++# and the file is read, its contents are served from the ++# brick instead of cache. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++##enable bitrot ++TEST $CLI volume bitrot $V0 enable ++ ++# expiry time is set to 1 second. Hence sleep for 2 seconds for the ++# oneshot crawler to finish its crawling and sign the file properly. ++sleep 2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location' ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location' ++ ++## Ondemand scrub ++TEST $CLI volume bitrot $V0 scrub ondemand ++ ++# the scrub ondemand CLI command, just ensures that ++# the scrubber has received the ondemand scrub directive ++# and started. sleep for 2 seconds for scrubber to finish ++# crawling and marking file(s) as bad (if if finds that ++# corruption has happened) which are filesystem operations. ++sleep 2 ++ ++TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1 ++ ++##Mount $V0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++ ++TEST cat $M0/FILE1 ++ ++cleanup; +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c +index b8feef7..424c0d5 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.c ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c +@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + int32_t ret = -1; + inode_t *linked_inode = NULL; + gf_boolean_t need_signing = _gf_false; ++ gf_boolean_t need_reopen = _gf_true; + + GF_VALIDATE_OR_GOTO("bit-rot", subvol, out); + GF_VALIDATE_OR_GOTO("bit-rot", data, out); +@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + uuid_utoa(linked_inode->gfid)); + } else { + need_signing = br_check_object_need_sign(this, xattr, child); ++ ++ /* ++ * If we are here means, bitrot daemon has started. Is it just ++ * a simple restart of the daemon or is it started because the ++ * feature is enabled is something hard to determine. Hence, ++ * if need_signing is false (because bit-rot version and signature ++ * are present), then still go ahead and sign it. ++ */ ++ if (!need_signing) { ++ need_signing = _gf_true; ++ need_reopen = _gf_true; ++ } + } + + if (!need_signing) +@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, + "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path, + uuid_utoa(linked_inode->gfid), child->brick_path); +- br_trigger_sign(this, child, linked_inode, &loc, _gf_true); ++ br_trigger_sign(this, child, linked_inode, &loc, need_reopen); + + ret = 0; + +-- +1.8.3.1 + diff --git a/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch b/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch new file mode 100644 index 0000000..e31349a --- /dev/null +++ b/SOURCES/0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch @@ -0,0 +1,152 @@ +From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Thu, 23 Jul 2020 11:07:32 +0530 +Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop + folder + +Problem: +If a gfid is present in indices/xattrop folder while +the file/dir is actaully healthy and all the xattrs are healthy, +it causes lot of lookups by shd on an entry which does not need +to be healed. +This whole process eats up lot of CPU usage without doing meaningful +work. + +Solution: +Set trusted.ec.dirty xattr of the entry so that actual heal process +happens and at the end of it, during unset of dirty, gfid enrty from +indices/xattrop will be removed. + +>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/ +>Fixes: #1385 + +Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b +BUG: 1785714 +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208591 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 73 ++++++++++++++++++++++++++++++++++++++- + xlators/cluster/ec/src/ec-types.h | 7 +++- + 2 files changed, 78 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index e2de879..7d25853 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2488,6 +2488,59 @@ out: + return ret; + } + ++int ++ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) ++{ ++ int i = 0; ++ int ret = 0; ++ dict_t **xattr = NULL; ++ loc_t loc = {0}; ++ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0}; ++ unsigned char *on = NULL; ++ default_args_cbk_t *replies = NULL; ++ dict_t *dict = NULL; ++ ++ /* Allocate the required memory */ ++ loc.inode = inode_ref(inode); ++ gf_uuid_copy(loc.gfid, inode->gfid); ++ on = alloca0(ec->nodes); ++ EC_REPLIES_ALLOC(replies, ec->nodes); ++ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer); ++ if (!xattr) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ dict = dict_new(); ++ if (!dict) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ for (i = 0; i < ec->nodes; i++) { ++ xattr[i] = dict; ++ on[i] = 1; ++ } ++ dirty_xattr[EC_METADATA_TXN] = hton64(1); ++ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, ++ (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); ++ if (ret < 0) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame, ++ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64, ++ xattr, NULL); ++out: ++ if (dict) { ++ dict_unref(dict); ++ } ++ if (xattr) { ++ GF_FREE(xattr); ++ } ++ cluster_replies_wipe(replies, ec->nodes); ++ loc_wipe(&loc); ++ return ret; ++} ++ + void + ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + { +@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false, + &need_heal); + +- if (need_heal == EC_HEAL_NONEED) { ++ if (need_heal == EC_HEAL_PURGE_INDEX) { ++ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, ++ "Index entry needs to be purged for: %s ", ++ uuid_utoa(loc->gfid)); ++ /* We need to send xattrop to set dirty flag so that it can be ++ * healed and index entry could be removed. We need not to take lock ++ * on this entry to do so as we are just setting dirty flag which ++ * actually increases the trusted.ec.dirty count and does not set ++ * the new value. ++ * This will make sure that it is not interfering in other fops.*/ ++ ec_heal_set_dirty_without_lock(frame, ec, loc->inode); ++ } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); + goto out; +@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources, + goto out; + } + } ++ /* If lock count is 0, all dirty flags are 0 and all the ++ * versions are macthing then why are we here. It looks ++ * like something went wrong while removing the index entries ++ * after completing a successful heal or fop. In this case ++ * we need to remove this index entry to avoid triggering heal ++ * in a loop and causing lookups again and again*/ ++ *need_heal = EC_HEAL_PURGE_INDEX; + } else { + for (i = 0; i < ec->nodes; i++) { + /* Since each lock can only increment the dirty +diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h +index f15429d..700dc39 100644 +--- a/xlators/cluster/ec/src/ec-types.h ++++ b/xlators/cluster/ec/src/ec-types.h +@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t); + + enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX }; + +-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST }; ++enum _ec_heal_need { ++ EC_HEAL_NONEED, ++ EC_HEAL_MAYBE, ++ EC_HEAL_MUST, ++ EC_HEAL_PURGE_INDEX ++}; + + enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL }; + +-- +1.8.3.1 + diff --git a/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch b/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch new file mode 100644 index 0000000..098be5f --- /dev/null +++ b/SOURCES/0461-geo-replication-Fix-IPv6-parsing.patch @@ -0,0 +1,127 @@ +From d425ed54261d5bc19aa853854cc3b64647e3c897 Mon Sep 17 00:00:00 2001 +From: Aravinda Vishwanathapura <aravinda@kadalu.io> +Date: Sun, 12 Jul 2020 12:42:36 +0530 +Subject: [PATCH 461/465] geo-replication: Fix IPv6 parsing + +Brick paths in Volinfo used `:` as delimiter, Geo-rep uses split +based on `:` char. This will go wrong with IPv6. + +This patch handles the IPv6 case and handles the split properly. +Backport of: + >Upstream Patch: https://review.gluster.org/#/c/glusterfs/+/24706 + >Fixes: #1366 + >Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d + >Signed-off-by: Aravinda Vishwanathapura <aravinda@kadalu.io> + +BUG: 1855966 +Change-Id: I25e88d693744381c0ccf3c1dbf1541b84be2499d +Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208610 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/master.py | 5 ++-- + geo-replication/syncdaemon/syncdutils.py | 43 +++++++++++++++++++++++++++++--- + 2 files changed, 43 insertions(+), 5 deletions(-) + +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 3f98337..08e98f8 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -26,7 +26,8 @@ from rconf import rconf + from syncdutils import Thread, GsyncdError, escape_space_newline + from syncdutils import unescape_space_newline, gauxpfx, escape + from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid +-from syncdutils import NoStimeAvailable, PartialHistoryAvailable ++from syncdutils import NoStimeAvailable, PartialHistoryAvailable, host_brick_split ++ + + URXTIME = (-1, 0) + +@@ -1466,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon): + node = rconf.args.resource_remote + node_data = node.split("@") + node = node_data[-1] +- remote_node_ip = node.split(":")[0] ++ remote_node_ip, _ = host_brick_split(node) + self.status.set_slave_node(remote_node_ip) + + def changelogs_batch_process(self, changes): +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index 7560fa1..f43e13b 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -883,6 +883,19 @@ class Popen(subprocess.Popen): + self.errfail() + + ++def host_brick_split(value): ++ """ ++ IPv6 compatible way to split and get the host ++ and brick information. Example inputs: ++ node1.example.com:/exports/bricks/brick1/brick ++ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick ++ """ ++ parts = value.split(":") ++ brick = parts[-1] ++ hostparts = parts[0:-1] ++ return (":".join(hostparts), brick) ++ ++ + class Volinfo(object): + + def __init__(self, vol, host='localhost', prelude=[], master=True): +@@ -925,7 +938,7 @@ class Volinfo(object): + @memoize + def bricks(self): + def bparse(b): +- host, dirp = b.find("name").text.split(':', 2) ++ host, dirp = host_brick_split(b.find("name").text) + return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text} + return [bparse(b) for b in self.get('brick')] + +@@ -1001,6 +1014,16 @@ class VolinfoFromGconf(object): + def is_hot(self, brickpath): + return False + ++ def is_uuid(self, value): ++ try: ++ uuid.UUID(value) ++ return True ++ except ValueError: ++ return False ++ ++ def possible_path(self, value): ++ return "/" in value ++ + @property + @memoize + def bricks(self): +@@ -1014,8 +1037,22 @@ class VolinfoFromGconf(object): + out = [] + for b in bricks_data: + parts = b.split(":") +- bpath = parts[2] if len(parts) == 3 else "" +- out.append({"host": parts[1], "dir": bpath, "uuid": parts[0]}) ++ b_uuid = None ++ if self.is_uuid(parts[0]): ++ b_uuid = parts[0] ++ # Set all parts except first ++ parts = parts[1:] ++ ++ if self.possible_path(parts[-1]): ++ bpath = parts[-1] ++ # Set all parts except last ++ parts = parts[0:-1] ++ ++ out.append({ ++ "host": ":".join(parts), # if remaining parts are IPv6 name ++ "dir": bpath, ++ "uuid": b_uuid ++ }) + + return out + +-- +1.8.3.1 + diff --git a/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch b/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch new file mode 100644 index 0000000..aa5fd21 --- /dev/null +++ b/SOURCES/0462-Issue-with-gf_fill_iatt_for_dirent.patch @@ -0,0 +1,43 @@ +From f027734165374979bd0bff8ea059dfaadca85e07 Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Thu, 2 Jul 2020 02:07:56 +0530 +Subject: [PATCH 462/465] Issue with gf_fill_iatt_for_dirent + +In "gf_fill_iatt_for_dirent()", while calculating inode_path for loc, +the inode should be of parent's. Instead it is loc.inode which results in error + and eventually lookup/readdirp fails. + +This patch fixes the same. + +This is backport of below mainstream fix : + +> Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5 +> Fixes: #1351 +> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24661/ + +Change-Id: Ied086234a4634e8cb13520521ac547c87b3c76b5 +BUG: 1853189 +Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/208691 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/gf-dirent.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c +index f289723..3fa67f2 100644 +--- a/libglusterfs/src/gf-dirent.c ++++ b/libglusterfs/src/gf-dirent.c +@@ -277,7 +277,7 @@ gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol) + gf_uuid_copy(loc.pargfid, parent->gfid); + loc.name = entry->d_name; + loc.parent = inode_ref(parent); +- ret = inode_path(loc.inode, entry->d_name, &path); ++ ret = inode_path(loc.parent, entry->d_name, &path); + loc.path = path; + if (ret < 0) + goto out; +-- +1.8.3.1 + diff --git a/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch b/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch new file mode 100644 index 0000000..b47cdd1 --- /dev/null +++ b/SOURCES/0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch @@ -0,0 +1,87 @@ +From 7d87933f648092ae55d57a96fd06e3df975d764c Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Tue, 18 Aug 2020 10:33:48 +0530 +Subject: [PATCH 463/465] cluster/ec: Change handling of heal failure to avoid + crash + +Problem: +ec_getxattr_heal_cbk was called with NULL as second argument +in case heal was failing. +This function was dereferencing "cookie" argument which caused crash. + +Solution: +Cookie is changed to carry the value that was supposed to be +stored in fop->data, so even in the case when fop is NULL in error +case, there won't be any NULL dereference. + +Thanks to Xavi for the suggestion about the fix. + +>Upstream patch: https://review.gluster.org/#/c/glusterfs/+/23050/ +>fixes: bz#1729085 + +Change-Id: I0798000d5cadb17c3c2fbfa1baf77033ffc2bb8c +BUG: 1852736 +Reviewed-on: https://code.engineering.redhat.com/gerrit/209012 +Tested-by: Ashish Pandey <aspandey@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 11 ++++++----- + xlators/cluster/ec/src/ec-inode-read.c | 4 ++-- + 2 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 7d25853..6e6948b 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -1966,7 +1966,7 @@ ec_manager_heal_block(ec_fop_data_t *fop, int32_t state) + + case EC_STATE_REPORT: + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, 0, 0, ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0, + (heal->good | heal->bad), heal->good, heal->bad, + 0, NULL); + } +@@ -2022,10 +2022,11 @@ ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this, + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) + { +- ec_fop_data_t *fop = cookie; +- ec_heal_t *heal = fop->data; ++ ec_heal_t *heal = cookie; + +- fop->heal = NULL; ++ if (heal->fop) { ++ heal->fop->heal = NULL; ++ } + heal->fop = NULL; + heal->error = op_ret < 0 ? op_errno : 0; + syncbarrier_wake(heal->data); +@@ -2669,7 +2670,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + out: + ec_reset_entry_healing(fop); + if (fop->cbks.heal) { +- fop->cbks.heal(fop->req_frame, fop, fop->xl, op_ret, op_errno, ++ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno, + ec_char_array_to_mask(participants, ec->nodes), + mgood & good, mbad & bad, pending, NULL); + } +diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c +index e82e8f6..c50d0ad 100644 +--- a/xlators/cluster/ec/src/ec-inode-read.c ++++ b/xlators/cluster/ec/src/ec-inode-read.c +@@ -396,8 +396,8 @@ ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, + uintptr_t good, uintptr_t bad, uint32_t pending, + dict_t *xdata) + { +- ec_fop_data_t *fop = cookie; +- fop_getxattr_cbk_t func = fop->data; ++ fop_getxattr_cbk_t func = cookie; ++ + ec_t *ec = xl->private; + dict_t *dict = NULL; + char *str; +-- +1.8.3.1 + diff --git a/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch b/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch new file mode 100644 index 0000000..d98e33d --- /dev/null +++ b/SOURCES/0464-storage-posix-Remove-nr_files-usage.patch @@ -0,0 +1,102 @@ +From 7c51addf7912a94320e6b148bd66f2dbf274c533 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 11 Mar 2019 14:04:39 +0530 +Subject: [PATCH 464/465] storage/posix: Remove nr_files usage + +nr_files is supposed to represent the number of files opened in posix. +Present logic doesn't seem to handle anon-fds because of which the +counts would always be wrong. + +I don't remember anyone using this value in debugging any problem probably +because we always have 'ls -l /proc/<pid>/fd' which not only prints the +fds that are active but also prints their paths. It also handles directories +and anon-fds which actually opened the file. So removing this code +instead of fixing the buggy logic to have the nr_files. + +> fixes bz#1688106 +> Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +> (Cherry pick from commit f5987d38f216a3142dfe45f03bf66ff4827d9b55) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/22333/) + +Change-Id: Ibf8713fdfdc1ef094e08e6818152637206a54040 +BUG: 1851989 +Signed-off-by: Mohit Agrawal<moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/209468 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-common.c | 2 -- + xlators/storage/posix/src/posix-entry-ops.c | 2 -- + xlators/storage/posix/src/posix-inode-fd-ops.c | 2 -- + xlators/storage/posix/src/posix.h | 1 - + 4 files changed, 7 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index ac53796..b317627 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -128,7 +128,6 @@ posix_priv(xlator_t *this) + gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); + gf_proc_dump_write("max_write", "%" PRId64, + GF_ATOMIC_GET(priv->write_value)); +- gf_proc_dump_write("nr_files", "%" PRId64, GF_ATOMIC_GET(priv->nr_files)); + + return 0; + } +@@ -815,7 +814,6 @@ posix_init(xlator_t *this) + } + + LOCK_INIT(&_private->lock); +- GF_ATOMIC_INIT(_private->nr_files, 0); + GF_ATOMIC_INIT(_private->read_value, 0); + GF_ATOMIC_INIT(_private->write_value, 0); + +diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c +index 65650b3..b3a5381 100644 +--- a/xlators/storage/posix/src/posix-entry-ops.c ++++ b/xlators/storage/posix/src/posix-entry-ops.c +@@ -2243,8 +2243,6 @@ fill_stat: + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", real_path, fd); + +- GF_ATOMIC_INC(priv->nr_files); +- + op_ret = 0; + + out: +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index d135d8b..81f4a6b 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1605,7 +1605,6 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", real_path, fd); + +- GF_ATOMIC_INC(priv->nr_files); + op_ret = 0; + + out: +@@ -2526,7 +2525,6 @@ posix_release(xlator_t *this, fd_t *fd) + if (!priv) + goto out; + +- GF_ATOMIC_DEC(priv->nr_files); + out: + return 0; + } +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 61495a7..124dbb4 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -154,7 +154,6 @@ struct posix_private { + + gf_atomic_t read_value; /* Total read, from init */ + gf_atomic_t write_value; /* Total write, from init */ +- gf_atomic_t nr_files; + /* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both +-- +1.8.3.1 + diff --git a/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch b/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch new file mode 100644 index 0000000..fc22456 --- /dev/null +++ b/SOURCES/0465-posix-Implement-a-janitor-thread-to-close-fd.patch @@ -0,0 +1,384 @@ +From 143b93b230b429cc712353243ed794b68494c040 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Mon, 27 Jul 2020 18:08:00 +0530 +Subject: [PATCH 465/465] posix: Implement a janitor thread to close fd + +Problem: In the commit fb20713b380e1df8d7f9e9df96563be2f9144fd6 we use + syntask to close fd but we have found the patch is reducing the + performance + +Solution: Use janitor thread to close fd's and save the pfd ctx into + ctx janitor list and also save the posix_xlator into pfd object to + avoid the race condition during cleanup in brick_mux environment + +> Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092 +> Fixes: #1396 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/24755/) +> (Cherry pick from commit 41b9616435cbdf671805856e487e373060c9455b + +Change-Id: Ifb3d18a854b267333a3a9e39845bfefb83fbc092 +BUG: 1851989 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/209448 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfsd/src/glusterfsd.c | 4 ++ + libglusterfs/src/glusterfs/glusterfs.h | 7 ++ + rpc/rpc-lib/src/rpcsvc.c | 6 -- + xlators/storage/posix/src/posix-common.c | 34 +++++++++- + xlators/storage/posix/src/posix-helpers.c | 93 ++++++++++++++++++++++++++ + xlators/storage/posix/src/posix-inode-fd-ops.c | 33 ++++----- + xlators/storage/posix/src/posix.h | 7 ++ + 7 files changed, 161 insertions(+), 23 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 9821180..955bf1d 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1839,6 +1839,10 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) + + INIT_LIST_HEAD(&cmd_args->xlator_options); + INIT_LIST_HEAD(&cmd_args->volfile_servers); ++ ctx->pxl_count = 0; ++ pthread_mutex_init(&ctx->fd_lock, NULL); ++ pthread_cond_init(&ctx->fd_cond, NULL); ++ INIT_LIST_HEAD(&ctx->janitor_fds); + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index 495a4d7..bf6a987 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -733,6 +733,13 @@ struct _glusterfs_ctx { + } stats; + + struct list_head volfile_list; ++ /* Add members to manage janitor threads for cleanup fd */ ++ struct list_head janitor_fds; ++ pthread_cond_t fd_cond; ++ pthread_mutex_t fd_lock; ++ pthread_t janitor; ++ /* The variable is use to save total posix xlator count */ ++ uint32_t pxl_count; + + char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ + }; +diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c +index 23ca1fd..3f184bf 100644 +--- a/rpc/rpc-lib/src/rpcsvc.c ++++ b/rpc/rpc-lib/src/rpcsvc.c +@@ -375,12 +375,6 @@ rpcsvc_program_actor(rpcsvc_request_t *req) + + req->ownthread = program->ownthread; + req->synctask = program->synctask; +- if (((req->procnum == GFS3_OP_RELEASE) || +- (req->procnum == GFS3_OP_RELEASEDIR)) && +- (program->prognum == GLUSTER_FOP_PROGRAM)) { +- req->ownthread = _gf_false; +- req->synctask = _gf_true; +- } + + err = SUCCESS; + gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s", +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index b317627..c5a43a1 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -150,6 +150,7 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + struct timespec sleep_till = { + 0, + }; ++ glusterfs_ctx_t *ctx = this->ctx; + + switch (event) { + case GF_EVENT_PARENT_UP: { +@@ -160,8 +161,6 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + case GF_EVENT_PARENT_DOWN: { + if (!victim->cleanup_starting) + break; +- gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", +- victim->name); + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); +@@ -187,6 +186,16 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + GF_FREE(priv->janitor); + } + priv->janitor = NULL; ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ while (priv->rel_fdcount > 0) { ++ pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", ++ victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; + default: +@@ -1038,7 +1047,13 @@ posix_init(xlator_t *this) + pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); ++ pthread_cond_init(&_private->fd_cond, NULL); + INIT_LIST_HEAD(&_private->fsyncs); ++ _private->rel_fdcount = 0; ++ ret = posix_spawn_ctx_janitor_thread(this); ++ if (ret) ++ goto out; ++ + ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { +@@ -1133,6 +1148,8 @@ posix_fini(xlator_t *this) + { + struct posix_private *priv = this->private; + gf_boolean_t health_check = _gf_false; ++ glusterfs_ctx_t *ctx = this->ctx; ++ uint32_t count; + int ret = 0; + + if (!priv) +@@ -1166,6 +1183,19 @@ posix_fini(xlator_t *this) + priv->janitor = NULL; + } + ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ count = --ctx->pxl_count; ++ if (count == 0) { ++ pthread_cond_signal(&ctx->fd_cond); ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ if (count == 0) { ++ pthread_join(ctx->janitor, NULL); ++ } ++ + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 39dbcce..73a44be 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -1582,6 +1582,99 @@ unlock: + return; + } + ++static struct posix_fd * ++janitor_get_next_fd(glusterfs_ctx_t *ctx) ++{ ++ struct posix_fd *pfd = NULL; ++ ++ while (list_empty(&ctx->janitor_fds)) { ++ if (ctx->pxl_count == 0) { ++ return NULL; ++ } ++ ++ pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); ++ } ++ ++ pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); ++ list_del_init(&pfd->list); ++ ++ return pfd; ++} ++ ++static void ++posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) ++{ ++ THIS = xl; ++ ++ if (pfd->dir == NULL) { ++ gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); ++ sys_close(pfd->fd); ++ } else { ++ gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); ++ sys_closedir(pfd->dir); ++ } ++ ++ GF_FREE(pfd); ++} ++ ++static void * ++posix_ctx_janitor_thread_proc(void *data) ++{ ++ xlator_t *xl; ++ struct posix_fd *pfd; ++ glusterfs_ctx_t *ctx = NULL; ++ struct posix_private *priv_fd; ++ ++ ctx = data; ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ ++ while ((pfd = janitor_get_next_fd(ctx)) != NULL) { ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ xl = pfd->xl; ++ posix_close_pfd(xl, pfd); ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ ++ priv_fd = xl->private; ++ priv_fd->rel_fdcount--; ++ if (!priv_fd->rel_fdcount) ++ pthread_cond_signal(&priv_fd->fd_cond); ++ } ++ ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ return NULL; ++} ++ ++int ++posix_spawn_ctx_janitor_thread(xlator_t *this) ++{ ++ int ret = 0; ++ glusterfs_ctx_t *ctx = NULL; ++ ++ ctx = this->ctx; ++ ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ if (ctx->pxl_count++ == 0) { ++ ret = gf_thread_create(&ctx->janitor, NULL, ++ posix_ctx_janitor_thread_proc, ctx, ++ "posixctxjan"); ++ ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, ++ "spawning janitor thread failed"); ++ ctx->pxl_count--; ++ } ++ } ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++ ++ return ret; ++} ++ + static int + is_fresh_file(int64_t ctime_sec) + { +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 81f4a6b..21119ea 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -1352,6 +1352,22 @@ out: + return 0; + } + ++static void ++posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) ++{ ++ glusterfs_ctx_t *ctx = this->ctx; ++ struct posix_private *priv = this->private; ++ ++ pfd->xl = this; ++ pthread_mutex_lock(&ctx->fd_lock); ++ { ++ list_add_tail(&pfd->list, &ctx->janitor_fds); ++ priv->rel_fdcount++; ++ pthread_cond_signal(&ctx->fd_cond); ++ } ++ pthread_mutex_unlock(&ctx->fd_lock); ++} ++ + int32_t + posix_releasedir(xlator_t *this, fd_t *fd) + { +@@ -1374,11 +1390,7 @@ posix_releasedir(xlator_t *this, fd_t *fd) + "pfd->dir is NULL for fd=%p", fd); + goto out; + } +- +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); +- +- sys_closedir(pfd->dir); +- GF_FREE(pfd); ++ posix_add_fd_to_cleanup(this, pfd); + + out: + return 0; +@@ -2494,7 +2506,6 @@ out: + int32_t + posix_release(xlator_t *this, fd_t *fd) + { +- struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; +@@ -2502,8 +2513,6 @@ posix_release(xlator_t *this, fd_t *fd) + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + +- priv = this->private; +- + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, +@@ -2517,13 +2526,7 @@ posix_release(xlator_t *this, fd_t *fd) + "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); + } + +- gf_msg_debug(this->name, 0, "janitor: closing dir fd=%p", pfd->dir); +- +- sys_close(pfd->fd); +- GF_FREE(pfd); +- +- if (!priv) +- goto out; ++ posix_add_fd_to_cleanup(this, pfd); + + out: + return 0; +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 124dbb4..07f367b 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -134,6 +134,8 @@ struct posix_fd { + off_t dir_eof; /* offset at dir EOF */ + int odirect; + struct list_head list; /* to add to the janitor list */ ++ xlator_t *xl; ++ char _pad[4]; /* manual padding */ + }; + + struct posix_private { +@@ -204,6 +206,7 @@ struct posix_private { + pthread_cond_t fsync_cond; + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; ++ pthread_cond_t fd_cond; + int fsync_queue_count; + + enum { +@@ -259,6 +262,7 @@ struct posix_private { + gf_boolean_t fips_mode_rchecksum; + gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; ++ uint32_t rel_fdcount; + }; + + typedef struct { +@@ -665,6 +669,9 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, + int + posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + ++int ++posix_spawn_ctx_janitor_thread(xlator_t *this); ++ + void + posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +-- +1.8.3.1 + diff --git a/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch b/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch new file mode 100644 index 0000000..1dc9f57 --- /dev/null +++ b/SOURCES/0466-cluster-ec-Change-stale-index-handling.patch @@ -0,0 +1,68 @@ +From b603170ae5f583037b8177a9d19e56c7821edf0b Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Tue, 25 Aug 2020 04:19:54 +0530 +Subject: [PATCH 466/466] cluster/ec: Change stale index handling + +Problem: +Earlier approach is setting dirty bit which requires extra heal + +Fix: +Send zero-xattrop which deletes stale index without any need +for extra heal. + + > Fixes: #1385 + > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24911/ + +BUG: 1785714 +Change-Id: I7e97a1d8b5516f7be47cae55d0e56b14332b6cae +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/209904 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Tested-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 6e6948b..06bafa5 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2490,7 +2490,7 @@ out: + } + + int +-ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) ++ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode) + { + int i = 0; + int ret = 0; +@@ -2520,7 +2520,6 @@ ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode) + xattr[i] = dict; + on[i] = 1; + } +- dirty_xattr[EC_METADATA_TXN] = hton64(1); + ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr, + (sizeof(*dirty_xattr) * EC_VERSION_SIZE)); + if (ret < 0) { +@@ -2621,13 +2620,10 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL, + "Index entry needs to be purged for: %s ", + uuid_utoa(loc->gfid)); +- /* We need to send xattrop to set dirty flag so that it can be +- * healed and index entry could be removed. We need not to take lock +- * on this entry to do so as we are just setting dirty flag which +- * actually increases the trusted.ec.dirty count and does not set +- * the new value. +- * This will make sure that it is not interfering in other fops.*/ +- ec_heal_set_dirty_without_lock(frame, ec, loc->inode); ++ /* We need to send zero-xattrop so that stale index entry could be ++ * removed. We need not take lock on this entry to do so as ++ * xattrop on a brick is atomic. */ ++ ec_heal_purge_stale_index(frame, ec, loc->inode); + } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); +-- +1.8.3.1 + diff --git a/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch b/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch new file mode 100644 index 0000000..93bb140 --- /dev/null +++ b/SOURCES/0467-build-Added-dependency-for-glusterfs-selinux.patch @@ -0,0 +1,38 @@ +From 9176ee8f10c3c33f31d00261995ed27e8680934a Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 3 Sep 2020 11:46:38 +0000 +Subject: [PATCH 467/467] build: Added dependency for glusterfs-selinux + +> Fixes: #1442 +> Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/24876/ +> Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97 +> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> + +BUG: 1460657 + +Change-Id: I7d79bceff329db4d525bc8a77ba7ffe41bf53c97 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/210637 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 9def416..ed6bdf3 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -650,6 +650,9 @@ Summary: Clustered file-system server + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-cli%{?_isa} = %{version}-%{release} + Requires: %{name}-libs%{?_isa} = %{version}-%{release} ++%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) ++Requires: glusterfs-selinux >= 0.1.0-2 ++%endif + # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse + Requires: %{name}-fuse%{?_isa} = %{version}-%{release} + # self-heal daemon, rebalance, nfs-server etc. are actually clients +-- +1.8.3.1 + diff --git a/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch b/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch new file mode 100644 index 0000000..b4b5ead --- /dev/null +++ b/SOURCES/0468-build-Update-the-glusterfs-selinux-version.patch @@ -0,0 +1,36 @@ +From 4b72f5e7704d480bac869f7a32ac891898bb994f Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 3 Sep 2020 14:56:27 +0000 +Subject: [PATCH 468/468] build: Update the glusterfs-selinux version + +Updated the glusterfs-selinux version according to +the downstream official version. + +Label: DOWNSTREAM ONLY + +BUG: 1460657 + +Change-Id: I7b8bbf53f71f6f56103042950d8910f0cb63a685 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/210685 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + glusterfs.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index ed6bdf3..30d7162 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -651,7 +651,7 @@ Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: %{name}-cli%{?_isa} = %{version}-%{release} + Requires: %{name}-libs%{?_isa} = %{version}-%{release} + %if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) +-Requires: glusterfs-selinux >= 0.1.0-2 ++Requires: glusterfs-selinux >= 1.0-1 + %endif + # some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse + Requires: %{name}-fuse%{?_isa} = %{version}-%{release} +-- +1.8.3.1 + diff --git a/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch b/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch new file mode 100644 index 0000000..0fadfc9 --- /dev/null +++ b/SOURCES/0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch @@ -0,0 +1,33 @@ +From 6fed6cfcb26e6ed3c9640c5f889629315bbd83c2 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Mon, 31 Aug 2020 12:22:05 +0530 +Subject: [PATCH 469/469] cluster/ec: Don't trigger heal for stale index + + > Fixes: #1385 + > Upstream-patch: https://review.gluster.org/c/glusterfs/+/24930 + +BUG: 1785714 +Change-Id: I3609dd2e1f63c4bd6a19d528b935bf5b05443824 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/210731 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/ec/src/ec-heal.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c +index 06bafa5..f6376cd 100644 +--- a/xlators/cluster/ec/src/ec-heal.c ++++ b/xlators/cluster/ec/src/ec-heal.c +@@ -2624,6 +2624,7 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial) + * removed. We need not take lock on this entry to do so as + * xattrop on a brick is atomic. */ + ec_heal_purge_stale_index(frame, ec, loc->inode); ++ goto out; + } else if (need_heal == EC_HEAL_NONEED) { + gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, + "Heal is not required for : %s ", uuid_utoa(loc->gfid)); +-- +1.8.3.1 + diff --git a/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch b/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch new file mode 100644 index 0000000..e26d46a --- /dev/null +++ b/SOURCES/0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch @@ -0,0 +1,63 @@ +From 8e427716f4e2855093b1a1a0e3a9ec79ebac7faf Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Thu, 10 Sep 2020 13:49:09 +0530 +Subject: [PATCH 470/473] extras/snap_scheduler: changes in + gluster-shared-storage mount path + +The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point +of gluster_shared_storage from /var/run to /run to address the issue of symlink +at mount path in fstab. +NOTE: mount point /var/run is symlink to /run + +The required changes with respect to gluster_shared_storage mount path are +introduced with this patch in snap_scheduler. + +>Fixes: #1476 +>Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/24971/ +BUG: 1873469 +Change-Id: I9ce88c2f624c6aa5982de04edfee2d0a9f160d62 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/211391 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/snap_scheduler/gcron.py | 4 ++-- + extras/snap_scheduler/snap_scheduler.py | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py +index cc16310..0e4df77 100755 +--- a/extras/snap_scheduler/gcron.py ++++ b/extras/snap_scheduler/gcron.py +@@ -19,10 +19,10 @@ import logging.handlers + import fcntl + + +-GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" ++GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" + GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks" + GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag" +-LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/" ++LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/" + log = logging.getLogger("gcron-logger") + start_time = 0.0 + +diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py +index 5a29d41..e8fcc44 100755 +--- a/extras/snap_scheduler/snap_scheduler.py ++++ b/extras/snap_scheduler/snap_scheduler.py +@@ -67,7 +67,7 @@ except ImportError: + SCRIPT_NAME = "snap_scheduler" + scheduler_enabled = False + log = logging.getLogger(SCRIPT_NAME) +-SHARED_STORAGE_DIR="/var/run/gluster/shared_storage" ++SHARED_STORAGE_DIR="/run/gluster/shared_storage" + GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled" + GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled" + GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks" +-- +1.8.3.1 + diff --git a/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch b/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch new file mode 100644 index 0000000..0ebba37 --- /dev/null +++ b/SOURCES/0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch @@ -0,0 +1,73 @@ +From d23ad767281af85cf07f5c3f63de482d40ee1953 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Thu, 10 Sep 2020 13:16:12 +0530 +Subject: [PATCH 471/473] nfs-ganesha: gluster_shared_storage fails to + automount on node reboot on rhel 8 + +The patch https://review.gluster.org/#/c/glusterfs/+/24934/, changes mount point +of gluster_shared_storage from /var/run to /run to address the issue of symlink +at mount path in fstab. +NOTE: mount point /var/run is symlink to /run + +The required changes with respect to gluster_shared_storage mount path are +introduced with this patch in nfs-ganesha. + +>Fixes: #1475 +>Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/24970/ +BUG: 1873469 +Change-Id: I9c7677a053e1291f71476d47ba6fa2e729f59625 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/211392 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/ganesha/ocf/ganesha_nfsd | 2 +- + extras/ganesha/scripts/ganesha-ha.sh | 2 +- + extras/hook-scripts/start/post/S31ganesha-start.sh | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd +index 93fc8be..f91e8b6 100644 +--- a/extras/ganesha/ocf/ganesha_nfsd ++++ b/extras/ganesha/ocf/ganesha_nfsd +@@ -36,7 +36,7 @@ else + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + fi + +-OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage" ++OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage" + : ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} + + ganesha_meta_data() { +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index a6814b1..9790a71 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -24,7 +24,7 @@ GANESHA_HA_SH=$(realpath $0) + HA_NUM_SERVERS=0 + HA_SERVERS="" + HA_VOL_NAME="gluster_shared_storage" +-HA_VOL_MNT="/var/run/gluster/shared_storage" ++HA_VOL_MNT="/run/gluster/shared_storage" + HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" + SERVICE_MAN="DISTRO_NOT_FOUND" + +diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh +index 90ba6bc..7ad6f23 100755 +--- a/extras/hook-scripts/start/post/S31ganesha-start.sh ++++ b/extras/hook-scripts/start/post/S31ganesha-start.sh +@@ -4,7 +4,7 @@ OPTSPEC="volname:,gd-workdir:" + VOL= + declare -i EXPORT_ID + ganesha_key="ganesha.enable" +-GANESHA_DIR="/var/run/gluster/shared_storage/nfs-ganesha" ++GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha" + CONF1="$GANESHA_DIR/ganesha.conf" + GLUSTERD_WORKDIR= + +-- +1.8.3.1 + diff --git a/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch b/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch new file mode 100644 index 0000000..79d4d0e --- /dev/null +++ b/SOURCES/0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch @@ -0,0 +1,98 @@ +From ccd45222c46b91b4d0cd57db9ea8b1515c97ada0 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Mon, 31 Aug 2020 20:08:39 +0530 +Subject: [PATCH 472/473] geo-rep: gluster_shared_storage fails to automount on + node reboot on rhel 8. + +Issue: On reboot, all the mounts get wiped out. + Only the mounts mentioned in /etc/fstab automatically gets mounted + during boot/reboot. + + But /etc/fstab complains on not getting a canonical path + (it gets path containing a symlink) + This is because the gluster_shared_storage, is mounted to + /var/run which is symlink to /run. This is a general practice + followed by most operating systems. + + [root@ ~]# ls -lsah /var/run + 0 lrwxrwxrwx. 1 root root 6 Jul 22 19:39 /var/run -> ../run + +Fix: Mount gluster_shared_storage on /run. + (Also It is seen that /var/run is mostly + used by old or legacy systems, thus it is a good practice to + update /var/run to /run) + +>fixes: #1459 +>Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> + +backport of https://review.gluster.org/#/c/glusterfs/+/24934/ +BUG: 1873469 +Change-Id: I8c16653be8cd746c84f01abf0eea19284fb97c77 +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/211387 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../set/post/S32gluster_enable_shared_storage.sh | 18 +++++++++--------- + geo-replication/gsyncd.conf.in | 2 +- + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +index 885ed03..3bae37c 100755 +--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh ++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +@@ -79,9 +79,9 @@ done + + if [ "$option" == "disable" ]; then + # Unmount the volume on all the nodes +- umount /var/run/gluster/shared_storage +- cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp +- mv /var/run/gluster/fstab.tmp /etc/fstab ++ umount /run/gluster/shared_storage ++ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp ++ mv /run/gluster/fstab.tmp /etc/fstab + fi + + if [ "$is_originator" == 1 ]; then +@@ -105,7 +105,7 @@ function check_volume_status() + } + + mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ +- /var/run/gluster/shared_storage" ++ /run/gluster/shared_storage" + + if [ "$option" == "enable" ]; then + retry=0; +@@ -120,10 +120,10 @@ if [ "$option" == "enable" ]; then + status=$(check_volume_status) + done + # Mount the volume on all the nodes +- umount /var/run/gluster/shared_storage +- mkdir -p /var/run/gluster/shared_storage ++ umount /run/gluster/shared_storage ++ mkdir -p /run/gluster/shared_storage + $mount_cmd +- cp /etc/fstab /var/run/gluster/fstab.tmp +- echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp +- mv /var/run/gluster/fstab.tmp /etc/fstab ++ cp /etc/fstab /run/gluster/fstab.tmp ++ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp ++ mv /run/gluster/fstab.tmp /etc/fstab + fi +diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in +index 11e57fd..9688c79 100644 +--- a/geo-replication/gsyncd.conf.in ++++ b/geo-replication/gsyncd.conf.in +@@ -123,7 +123,7 @@ type=bool + help=Use this to set Active Passive mode to meta-volume. + + [meta-volume-mnt] +-value=/var/run/gluster/shared_storage ++value=/run/gluster/shared_storage + help=Meta Volume or Shared Volume mount path + + [allow-network] +-- +1.8.3.1 + diff --git a/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch b/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch new file mode 100644 index 0000000..0629fa7 --- /dev/null +++ b/SOURCES/0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch @@ -0,0 +1,75 @@ +From 80f1b3aedcde02ae25b341519857ba9a5b2fa722 Mon Sep 17 00:00:00 2001 +From: Sheetal Pamecha <spamecha@redhat.com> +Date: Thu, 24 Sep 2020 19:43:29 +0530 +Subject: [PATCH 473/473] glusterd: Fix Add-brick with increasing replica count + failure + +Problem: add-brick operation fails with multiple bricks on same +server error when replica count is increased. + +This was happening because of extra runs in a loop to compare +hostnames and if bricks supplied were less than "replica" count, +the bricks will get compared to itself resulting in above error. + +>Upstream-patch: https://review.gluster.org/#/c/glusterfs/+/25029 +>Fixes: #1508 + +BUG: 1881823 +Change-Id: I8668e964340b7bf59728bb838525d2db062197ed +Signed-off-by: Sheetal Pamecha <spamecha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/213064 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/brick-order-check-add-brick.t | 21 +++++++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 4 ++++ + 2 files changed, 25 insertions(+) + +diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t +index 29f0ed1..0be31da 100644 +--- a/tests/bugs/glusterd/brick-order-check-add-brick.t ++++ b/tests/bugs/glusterd/brick-order-check-add-brick.t +@@ -37,4 +37,25 @@ EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks' + TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force + EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks' + ++TEST $CLI_1 volume stop $V0 ++TEST $CLI_1 volume delete $V0 ++ ++TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1 ++EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks' ++EXPECT 'Created' volinfo_field $V0 'Status' ++ ++TEST $CLI_1 volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++ ++#Add-brick with Increasing replica count ++TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1 ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++ ++#Add-brick with Increasing replica count from same host should fail ++TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3 ++ ++#adding multiple bricks from same host should fail the brick order check ++TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9} ++EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks' ++ + cleanup +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 545e688..d25fc8a 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -14908,6 +14908,10 @@ glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + i = 0; + ai_list_tmp1 = cds_list_entry(ai_list->list.next, addrinfo_list_t, list); + ++ if (brick_count < sub_count) { ++ sub_count = brick_count; ++ } ++ + /* Check for bad brick order */ + while (i < brick_count) { + ++i; +-- +1.8.3.1 + diff --git a/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch b/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch new file mode 100644 index 0000000..034a2a2 --- /dev/null +++ b/SOURCES/0474-features-locks-posixlk-clear-lock-should-set-error-a.patch @@ -0,0 +1,49 @@ +From 3612b3a46c33d19bb7d4aee6eb6625d8d903d459 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Wed, 17 Jun 2020 10:44:37 +0530 +Subject: [PATCH 474/478] features/locks: posixlk-clear-lock should set error + as EINTR + +Problem: +fuse on receiving interrupt for setlk sends clear-lock "fop" +using virtual-getxattr. At the moment blocked locks which are +cleared return EAGAIN errno as opposed to EINTR errno + +Fix: +Return EINTR errno. + +Upstream: +> Reviewed-on: https://review.gluster.org/24587 +> Updates: #1310 +> Change-Id: I47de0fcaec370b267f2f5f89deeb37e1b9c0ee9b +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1821743 +Change-Id: Id8301ce6e21c009949e88db5904d8b6ecc278f66 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216157 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/locks/src/clear.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c +index 116aed6..ab1eac6 100644 +--- a/xlators/features/locks/src/clear.c ++++ b/xlators/features/locks/src/clear.c +@@ -181,9 +181,9 @@ clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args, + if (plock->blocked) { + bcount++; + pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW, +- &plock->user_flock, -1, EAGAIN, NULL); ++ &plock->user_flock, -1, EINTR, NULL); + +- STACK_UNWIND_STRICT(lk, plock->frame, -1, EAGAIN, ++ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR, + &plock->user_flock, NULL); + + } else { +-- +1.8.3.1 + diff --git a/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch b/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch new file mode 100644 index 0000000..24a62b3 --- /dev/null +++ b/SOURCES/0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch @@ -0,0 +1,46 @@ +From 47d8c316f622850d060af90d1d939528ace5607a Mon Sep 17 00:00:00 2001 +From: Csaba Henk <csaba@redhat.com> +Date: Thu, 14 Feb 2019 02:01:38 +0100 +Subject: [PATCH 475/478] fuse lock interrupt: fix flock_interrupt.t + +Upstream: +> Reviewed-on: https://review.gluster.org/22213 +> updates: bz#1193929 +> Change-Id: I347de62755100cd69e3cf341434767ae23fd1ba4 +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1821743 +Change-Id: I0088f804bca215152e7ca2c490402c11f7b5333a +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216158 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/features/flock_interrupt.t | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +index 8603b65..964a4bc 100644 +--- a/tests/features/flock_interrupt.t ++++ b/tests/features/flock_interrupt.t +@@ -22,12 +22,12 @@ EXPECT 'Started' volinfo_field $V0 'Status'; + TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + TEST touch $M0/testfile; + +-function flock_interrupt { +- flock $MO/testfile sleep 3 & flock -w 1 $M0/testfile true; +- echo ok; +-} ++echo > got_lock ++flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok > got_lock; } & + +-EXPECT_WITHIN 2 ok flock_interrupt; ++EXPECT_WITHIN 4 ok cat got_lock; + + ## Finish up ++sleep 7; ++rm -f got_lock; + cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch b/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch new file mode 100644 index 0000000..6c9d736 --- /dev/null +++ b/SOURCES/0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch @@ -0,0 +1,114 @@ +From 40519185067d891f06818c574301ea1af4b36479 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Wed, 17 Jun 2020 10:45:19 +0530 +Subject: [PATCH 476/478] mount/fuse: use cookies to get fuse-interrupt-record + instead of xdata + +Problem: +On executing tests/features/flock_interrupt.t the following error log +appears +[2020-06-16 11:51:54.631072 +0000] E +[fuse-bridge.c:4791:fuse_setlk_interrupt_handler_cbk] 0-glusterfs-fuse: +interrupt record not found + +This happens because fuse-interrupt-record is never sent on the wire by +getxattr fop and there is no guarantee that in the cbk it will be +available in case of failures. + +Fix: +wind getxattr fop with fuse-interrupt-record as cookie and recover it +in the cbk + +Upstream: +> Reviewed-on: https://review.gluster.org/24588 +> Fixes: #1310 +> Change-Id: I4cfff154321a449114fc26e9440db0f08e5c7daa +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1821743 +Change-Id: If9576801654d4d743bd66ae90ca259c4d34746a7 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216159 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/features/flock_interrupt.t | 1 - + xlators/mount/fuse/src/fuse-bridge.c | 28 +++++++--------------------- + 2 files changed, 7 insertions(+), 22 deletions(-) + +diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t +index 964a4bc..b8717e3 100644 +--- a/tests/features/flock_interrupt.t ++++ b/tests/features/flock_interrupt.t +@@ -28,6 +28,5 @@ flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok + EXPECT_WITHIN 4 ok cat got_lock; + + ## Finish up +-sleep 7; + rm -f got_lock; + cleanup; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index f61fa39..1bddac2 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -4768,16 +4768,8 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, + int32_t op_errno, dict_t *dict, dict_t *xdata) + { + fuse_interrupt_state_t intstat = INTERRUPT_NONE; +- fuse_interrupt_record_t *fir; ++ fuse_interrupt_record_t *fir = cookie; + fuse_state_t *state = NULL; +- int ret = 0; +- +- ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir); +- if (ret < 0) { +- gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found"); +- +- goto out; +- } + + intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED; + +@@ -4789,7 +4781,6 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, + GF_FREE(state); + } + +-out: + STACK_DESTROY(frame->root); + + return 0; +@@ -4827,9 +4818,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir) + frame->op = GF_FOP_GETXATTR; + state->name = xattr_name; + +- STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol, +- state->active_subvol->fops->fgetxattr, state->fd, xattr_name, +- state->xdata); ++ STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir, ++ state->active_subvol, ++ state->active_subvol->fops->fgetxattr, state->fd, ++ xattr_name, state->xdata); + + return; + +@@ -4852,15 +4844,9 @@ fuse_setlk_resume(fuse_state_t *state) + fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler); + state_clone = gf_memdup(state, sizeof(*state)); + if (state_clone) { +- /* +- * Calling this allocator with fir casted to (char *) seems like +- * an abuse of this API, but in fact the API is stupid to assume +- * a (char *) argument (in the funcion it's casted to (void *) +- * anyway). +- */ +- state_clone->xdata = dict_for_key_value( +- "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true); ++ state_clone->xdata = dict_new(); + } ++ + if (!fir || !state_clone || !state_clone->xdata) { + if (fir) { + GF_FREE(fir); +-- +1.8.3.1 + diff --git a/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch b/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch new file mode 100644 index 0000000..c604ccd --- /dev/null +++ b/SOURCES/0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch @@ -0,0 +1,51 @@ +From 3d50207b346cb5d95af94aa010ebd1ec3e795554 Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar <ssivakum@redhat.com> +Date: Wed, 4 Nov 2020 11:44:51 +0530 +Subject: [PATCH 477/478] glusterd/snapshot: Snapshot prevalidation failure not + failing. + +The value of `ret` is to be set to `-1` to indicate failure +or else the prevalidation which is supposed to be a failure +as the snapshot isn't even activated for cloning will move +to next stage. + +Label: DOWNSTREAM ONLY +BUG: 1837926 + +Change-Id: I95122c3a261332630efa00033a1892a8f95fc00b +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216920 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Shwetha Acharya <sacharya@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-snapshot.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +index 5b8ae97..ee3cea0 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c ++++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c +@@ -2298,8 +2298,8 @@ glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr, + goto out; + } + +- + if (!glusterd_is_volume_started(snap_vol)) { ++ ret = -1; + snprintf(err_str, sizeof(err_str), + "Snapshot %s is " + "not activated", +@@ -9361,7 +9361,8 @@ glusterd_handle_snapshot_fn(rpcsvc_request_t *req) + "for a snapshot"); + op_errno = EG_OPNOTSUP; + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION, +- "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_RHS_3_0); ++ "%s (%d < %d)", err_str, conf->op_version, ++ GD_OP_VERSION_RHS_3_0); + ret = -1; + goto out; + } +-- +1.8.3.1 + diff --git a/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch b/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch new file mode 100644 index 0000000..596fe2b --- /dev/null +++ b/SOURCES/0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch @@ -0,0 +1,119 @@ +From e772bef5631017145cd0270d72a9ada1378e022a Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Fri, 30 Oct 2020 08:27:47 +0200 +Subject: [PATCH 478/478] DHT - Fixing rebalance failure on issuing stop + command + +Issuing a stop command for an ongoing rebalance process results in an error. +This issue was brought up in https://bugzilla.redhat.com/1286171 and a patch +(https://review.gluster.org/24103/) was submitted to resolve the issue. + +However the submitted patch resolved only part of the +problem by reducing the number of log messages that were printed (since +rebalnace is currently a recursive process, an error message was printed +for every directory) but didn't fully resolve the root cause for the +failure. + +This patch fixes the issue by modifying the code-path which handles the +termination of the rebalance process by issuing a stop command. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1628 +> fixes: #1627 +> Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03 +> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com + +BUG: 1286171 +Change-Id: I604f2b0f8b1ccb1026b8425a14200bbd1dc5bd03 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/216896 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-rebalance.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index abc10fc..d49a719 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -3113,12 +3113,10 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + struct dht_container *tmp_container = NULL; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + + if (dir_dfmeta->offset_var[i].readdir_done == 1) { +- ret = 0; + goto out; + } + +@@ -3135,7 +3133,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + &(dir_dfmeta->equeue[i]), xattr_req, NULL); + if (ret == 0) { + dir_dfmeta->offset_var[i].readdir_done = 1; +- ret = 0; + goto out; + } + +@@ -3161,7 +3158,6 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + + while (1) { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + +@@ -3273,12 +3269,14 @@ int static gf_defrag_get_entry(xlator_t *this, int i, + } + + out: +- if (ret == 0) { +- *container = tmp_container; +- } else { +- if (tmp_container) { ++ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) { ++ if (ret == 0) { ++ *container = tmp_container; ++ } else { + gf_defrag_free_container(tmp_container); + } ++ } else { ++ gf_defrag_free_container(tmp_container); + } + + return ret; +@@ -3487,7 +3485,7 @@ gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + migrate_data, dir_dfmeta, xattr_req, + &should_commit_hash, perrno); + +- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + goto out; + } + +@@ -3947,7 +3945,7 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout, + migrate_data); + +- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) { ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { + goto out; + } + +@@ -4015,6 +4013,10 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) { + ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno); + ++ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { ++ goto out; ++ } ++ + if (ret && (ret != 2)) { + if (perrno == ENOENT || perrno == ESTALE) { + ret = 0; +-- +1.8.3.1 + diff --git a/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch b/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch new file mode 100644 index 0000000..8bbdf9d --- /dev/null +++ b/SOURCES/0479-ganesha-ha-revised-regex-exprs-for-status.patch @@ -0,0 +1,53 @@ +From 9036c9f0fd081c83c5c4fcd1ecba858421442777 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com> +Date: Tue, 10 Nov 2020 07:39:14 -0500 +Subject: [PATCH 479/479] ganesha-ha: revised regex exprs for --status + +better whitespace in regex + +This has worked for years, but somehow no longer works on rhel8 + +> Updates: #1000 +> Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4 +> Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +> https://github.com/gluster/glusterfs/commit/4026fe9a956238d8e4785cf39c3b7290eae90f03 + +BUG: 1895301 +Change-Id: I2c1a3537573d125608334772ba1a263c55407dd4 +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/217480 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + extras/ganesha/scripts/ganesha-ha.sh | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh +index 9790a71..491c61d 100644 +--- a/extras/ganesha/scripts/ganesha-ha.sh ++++ b/extras/ganesha/scripts/ganesha-ha.sh +@@ -948,18 +948,18 @@ status() + # check if the VIP and port block/unblock RAs are on the expected nodes + for n in ${nodes[*]}; do + +- grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-nfs_block +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) +- grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-cluster_ip-1 +\(ocf::heartbeat:IPaddr\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) +- grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} ++ grep -E -x "${n}-nfs_unblock +\(ocf::heartbeat:portblock\): +Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done + +- grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} ++ grep -E "\): +Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch b/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch new file mode 100644 index 0000000..31c404f --- /dev/null +++ b/SOURCES/0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch @@ -0,0 +1,255 @@ +From 759c12fc016a6399bb179aa0f930602c87d1e0f8 Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Tue, 24 Nov 2020 12:56:10 +0200 +Subject: [PATCH 480/480] DHT/Rebalance - Ensure Rebalance reports status only + once upon stopping + +Upon issuing rebalance stop command, the status of rebalance is being +logged twice to the log file, which can sometime result in an +inconsistent reports (one report states status stopped, while the other +may report something else). + +This fix ensures rebalance reports it's status only once and that the +correct status is being reported. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1783 +> fixes: #1782 +> Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37 +> Signed-off-by: Barak Sason Rofman bsasonro@redhat.com + +BUG: 1286171 +Change-Id: Id3206edfad33b3db60e9df8e95a519928dc7cb37 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/218953 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Csaba Henk <chenk@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/distribute/bug-1286171.t | 75 +++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 2 +- + xlators/cluster/dht/src/dht-common.h | 2 +- + xlators/cluster/dht/src/dht-rebalance.c | 63 ++++++++++++++------------- + 4 files changed, 108 insertions(+), 34 deletions(-) + create mode 100644 tests/bugs/distribute/bug-1286171.t + +diff --git a/tests/bugs/distribute/bug-1286171.t b/tests/bugs/distribute/bug-1286171.t +new file mode 100644 +index 0000000..a2ca36f +--- /dev/null ++++ b/tests/bugs/distribute/bug-1286171.t +@@ -0,0 +1,75 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../cluster.rc ++. $(dirname $0)/../../volume.rc ++ ++# Initialize ++#------------------------------------------------------------ ++cleanup; ++ ++volname=bug-1286171 ++ ++# Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++# Create a volume ++TEST $CLI volume create $volname $H0:$B0/${volname}{1,2} ++ ++# Verify volume creation ++EXPECT "$volname" volinfo_field $volname 'Volume Name'; ++EXPECT 'Created' volinfo_field $volname 'Status'; ++ ++# Start volume and verify successful start ++TEST $CLI volume start $volname; ++EXPECT 'Started' volinfo_field $volname 'Status'; ++TEST glusterfs --volfile-id=$volname --volfile-server=$H0 --entry-timeout=0 $M0; ++#------------------------------------------------------------ ++ ++# Create a nested dir structure and some file under MP ++cd $M0; ++for i in {1..5} ++do ++ mkdir dir$i ++ cd dir$i ++ for j in {1..5} ++ do ++ mkdir dir$i$j ++ cd dir$i$j ++ for k in {1..5} ++ do ++ mkdir dir$i$j$k ++ cd dir$i$j$k ++ touch {1..300} ++ cd .. ++ done ++ touch {1..300} ++ cd .. ++ done ++ touch {1..300} ++ cd .. ++done ++touch {1..300} ++ ++# Add-brick and start rebalance ++TEST $CLI volume add-brick $volname $H0:$B0/${volname}4; ++TEST $CLI volume rebalance $volname start; ++ ++# Let rebalance run for a while ++sleep 5 ++ ++# Stop rebalance ++TEST $CLI volume rebalance $volname stop; ++ ++# Allow rebalance to stop ++sleep 5 ++ ++# Examine the logfile for errors ++cd /var/log/glusterfs; ++failures=`grep "failures:" ${volname}-rebalance.log | tail -1 | sed 's/.*failures: //; s/,.*//'`; ++ ++TEST [ $failures == 0 ]; ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 23cc80c..4db89df 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -10969,7 +10969,7 @@ dht_notify(xlator_t *this, int event, void *data, ...) + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) +- gf_defrag_status_get(conf, output); ++ gf_defrag_status_get(conf, output, _gf_false); + else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) + gf_defrag_start_detach_tier(defrag); + else if (cmd == GF_DEFRAG_CMD_DETACH_START) +diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h +index 9ec5b51..92f1b89 100644 +--- a/xlators/cluster/dht/src/dht-common.h ++++ b/xlators/cluster/dht/src/dht-common.h +@@ -1252,7 +1252,7 @@ dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata); + + int +-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict); ++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status); + + void + gf_defrag_set_pause_state(gf_tier_conf_t *tier_conf, tier_pause_state_t state); +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index d49a719..16ac16c 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -2720,7 +2720,6 @@ gf_defrag_migrate_single_file(void *opaque) + iatt_ptr = &entry->d_stat; + + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = -1; + goto out; + } + +@@ -3833,7 +3832,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, + list_for_each_entry_safe(entry, tmp, &entries.list, list) + { + if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { +- ret = 1; + goto out; + } + +@@ -4863,7 +4861,7 @@ out: + LOCK(&defrag->lock); + { + status = dict_new(); +- gf_defrag_status_get(conf, status); ++ gf_defrag_status_get(conf, status, _gf_true); + if (ctx && ctx->notify) + ctx->notify(GF_EN_DEFRAG_STATUS, status); + if (status) +@@ -4998,7 +4996,7 @@ out: + } + + int +-gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) ++gf_defrag_status_get(dht_conf_t *conf, dict_t *dict, gf_boolean_t log_status) + { + int ret = 0; + uint64_t files = 0; +@@ -5095,34 +5093,35 @@ gf_defrag_status_get(dht_conf_t *conf, dict_t *dict) + gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left"); + + log: +- switch (defrag->defrag_status) { +- case GF_DEFRAG_STATUS_NOT_STARTED: +- status = "not started"; +- break; +- case GF_DEFRAG_STATUS_STARTED: +- status = "in progress"; +- break; +- case GF_DEFRAG_STATUS_STOPPED: +- status = "stopped"; +- break; +- case GF_DEFRAG_STATUS_COMPLETE: +- status = "completed"; +- break; +- case GF_DEFRAG_STATUS_FAILED: +- status = "failed"; +- break; +- default: +- break; +- } ++ if (log_status) { ++ switch (defrag->defrag_status) { ++ case GF_DEFRAG_STATUS_NOT_STARTED: ++ status = "not started"; ++ break; ++ case GF_DEFRAG_STATUS_STARTED: ++ status = "in progress"; ++ break; ++ case GF_DEFRAG_STATUS_STOPPED: ++ status = "stopped"; ++ break; ++ case GF_DEFRAG_STATUS_COMPLETE: ++ status = "completed"; ++ break; ++ case GF_DEFRAG_STATUS_FAILED: ++ status = "failed"; ++ break; ++ default: ++ break; ++ } + +- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, +- "Rebalance is %s. Time taken is %.2f secs", status, elapsed); +- gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, +- "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64 +- ", failures: %" PRIu64 +- ", skipped: " +- "%" PRIu64, +- files, size, lookup, failures, skipped); ++ gf_msg("DHT", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS, ++ "Rebalance is %s. Time taken is %.2f secs " ++ "Files migrated: %" PRIu64 ", size: %" PRIu64 ++ ", lookups: %" PRIu64 ", failures: %" PRIu64 ++ ", skipped: " ++ "%" PRIu64, ++ status, elapsed, files, size, lookup, failures, skipped); ++ } + out: + return 0; + } +@@ -5299,7 +5298,7 @@ gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) + defrag->defrag_status = status; + + if (output) +- gf_defrag_status_get(conf, output); ++ gf_defrag_status_get(conf, output, _gf_false); + ret = 0; + out: + gf_msg_debug("", 0, "Returning %d", ret); +-- +1.8.3.1 + diff --git a/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch new file mode 100644 index 0000000..dd9b0ab --- /dev/null +++ b/SOURCES/0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch @@ -0,0 +1,33 @@ +From 346aa7cbc34b9bbbaca45180215a4d9ffd5055df Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Fri, 19 Feb 2021 06:19:07 +0000 +Subject: [PATCH 481/481] RHGS-3.5.3 rebuild to ship with RHEL. + +Label: DOWNSTREAM ONLY +BUG: 1930561 + +Change-Id: I9c7f30cc6bc616344b27072bfde056c7bba1e143 +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/228413 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfs.spec.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 30d7162..52f9b40 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1983,6 +1983,8 @@ fi + %endif + + %changelog ++* Fri Feb 19 2021 Rinku Kothiya <rkothiya@redhat.com> ++- Build RGHS clients for RHEL (#1930561) + + * Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com> + - added requires policycoreutils-python-utils on rhel8 for geo-replication +-- +1.8.3.1 + diff --git a/SOURCES/0482-logger-Always-print-errors-in-english.patch b/SOURCES/0482-logger-Always-print-errors-in-english.patch new file mode 100644 index 0000000..e454bec --- /dev/null +++ b/SOURCES/0482-logger-Always-print-errors-in-english.patch @@ -0,0 +1,49 @@ +From e43af5b15d14e43c3201fd0fb7bf02663e3e0127 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Sat, 7 Nov 2020 12:09:36 +0530 +Subject: [PATCH 482/511] logger: Always print errors in english + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1657 +> fixes: #1302 +> Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d +> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> + +BUG: 1896425 + +Change-Id: If0e21f016155276a953c64a8dd13ff3eb281d09d +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/219999 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/logging.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c +index 7f0eff6..5874c34 100644 +--- a/libglusterfs/src/logging.c ++++ b/libglusterfs/src/logging.c +@@ -513,6 +513,7 @@ gf_openlog(const char *ident, int option, int facility) + { + int _option = option; + int _facility = facility; ++ char *language = NULL; + + if (-1 == _option) { + _option = LOG_PID | LOG_NDELAY; +@@ -522,7 +523,10 @@ gf_openlog(const char *ident, int option, int facility) + } + + /* TODO: Should check for errors here and return appropriately */ +- setlocale(LC_ALL, ""); ++ language = setlocale(LC_ALL, "en_US.UTF-8"); ++ if (!language) ++ setlocale(LC_ALL, ""); ++ + setlocale(LC_NUMERIC, "C"); /* C-locale for strtod, ... */ + /* close the previous syslog if open as we are changing settings */ + closelog(); +-- +1.8.3.1 + diff --git a/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch b/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch new file mode 100644 index 0000000..c0f2118 --- /dev/null +++ b/SOURCES/0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch @@ -0,0 +1,150 @@ +From 8c366f34a279a5ab2a6301bfd93534fe746a23e8 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Mon, 7 Dec 2020 09:53:27 +0530 +Subject: [PATCH 483/511] afr: more quorum checks in lookup and new entry + marking + +Problem: See upstream github issue for details. + +Fix: +-In lookup if the entry exists in 2 out of 3 bricks, don't fail the +lookup with ENOENT just because there is an entrylk on the parent. +Consider quorum before deciding. + +-If entry FOP does not succeed on quorum no. of bricks, do not perform +new entry mark. + +Upstream patch details: +> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24499/ +> Fixes: #1303 +> Change-Id: I56df8c89ad53b29fa450c7930a7b7ccec9f4a6c5 +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +BUG: 1821599 +Change-Id: If513e8a7d6088a676288927630d8e616269bf5d5 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220363 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + ...20-mark-dirty-for-entry-txn-on-quorum-failure.t | 2 -- + xlators/cluster/afr/src/afr-common.c | 24 ++++++++++++---------- + xlators/cluster/afr/src/afr-dir-write.c | 8 ++++++++ + xlators/cluster/afr/src/afr.h | 4 ++++ + 4 files changed, 25 insertions(+), 13 deletions(-) + +diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t +index 26f9049..49c4dea 100644 +--- a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t ++++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t +@@ -53,8 +53,6 @@ TEST ! ls $B0/${V0}1/file$i + TEST ls $B0/${V0}2/file$i + dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2) + TEST [ "$dirty" != "000000000000000000000000" ] +-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2/file$i +-EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2/file$i + + TEST $CLI volume set $V0 self-heal-daemon on + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 89e2483..851ccad 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -1236,7 +1236,7 @@ refresh_done: + return 0; + } + +-static void ++void + afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies) + { +@@ -2290,6 +2290,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + 0, + }; + gf_boolean_t locked_entry = _gf_false; ++ gf_boolean_t in_flight_create = _gf_false; + gf_boolean_t can_interpret = _gf_true; + inode_t *parent = NULL; + ia_type_t ia_type = IA_INVAL; +@@ -2333,17 +2334,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + if (!replies[i].valid) + continue; + +- if (locked_entry && replies[i].op_ret == -1 && +- replies[i].op_errno == ENOENT) { +- /* Second, check entry is still +- "underway" in creation */ +- local->op_ret = -1; +- local->op_errno = ENOENT; +- goto error; +- } +- +- if (replies[i].op_ret == -1) ++ if (replies[i].op_ret == -1) { ++ if (locked_entry && replies[i].op_errno == ENOENT) { ++ in_flight_create = _gf_true; ++ } + continue; ++ } + + if (read_subvol == -1 || !readable[read_subvol]) { + read_subvol = i; +@@ -2353,6 +2349,12 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + } + } + ++ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) { ++ local->op_ret = -1; ++ local->op_errno = ENOENT; ++ goto error; ++ } ++ + if (read_subvol == -1) + goto error; + /* We now have a read_subvol, which is readable[] (if there +diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c +index 84e2a34..416c19d 100644 +--- a/xlators/cluster/afr/src/afr-dir-write.c ++++ b/xlators/cluster/afr/src/afr-dir-write.c +@@ -349,6 +349,7 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) + afr_private_t *priv = NULL; + int pre_op_count = 0; + int failed_count = 0; ++ unsigned char *success_replies = NULL; + + local = frame->local; + priv = this->private; +@@ -364,9 +365,16 @@ afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this) + failed_count = AFR_COUNT(local->transaction.failed_subvols, + priv->child_count); + ++ /* FOP succeeded on all bricks. */ + if (pre_op_count == priv->child_count && !failed_count) + return; + ++ /* FOP did not suceed on quorum no. of bricks. */ ++ success_replies = alloca0(priv->child_count); ++ afr_fill_success_replies(local, priv, success_replies); ++ if (!afr_has_quorum(success_replies, this, NULL)) ++ return; ++ + if (priv->thin_arbiter_count) { + /*Mark new entry using ta file*/ + local->is_new_entry = _gf_true; +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index ff96246..ed5096e 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -1334,4 +1334,8 @@ afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); + + void + afr_selfheal_childup(xlator_t *this, afr_private_t *priv); ++ ++void ++afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, ++ unsigned char *replies); + #endif /* __AFR_H__ */ +-- +1.8.3.1 + diff --git a/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch b/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch new file mode 100644 index 0000000..56d4feb --- /dev/null +++ b/SOURCES/0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch @@ -0,0 +1,90 @@ +From 6c3b21ce5bb76b35856a6c270eb65d11f869061f Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Fri, 26 Jun 2020 12:10:31 +0530 +Subject: [PATCH 484/511] glusterd: rebalance status displays stats as 0 after + reboot + +problem: while the rebalance is in progress, if a node is +rebooted rebalance v status shows the stats of this node as +0 once the node is back. + +Reason: when the node is rebooted, once it is back +glusterd_volume_defrag_restart() starts the rebalance and +creates the rpc. but due to some race, rebalance process is +sending disconnect event, so rpc object is getting destroyed. As +the rpc object is null, request for fetching the latest stats is +not sent to rebalance process. and stats are shows as default values +which is 0. + +Solution: When the rpc object null, we should create the rpc if the +rebalance process is up. so that request can be sent to rebalance +process using the rpc. + +>fixes: #1339 +>Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c +>Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +Upstream Patch : https://review.gluster.org/c/glusterfs/+/24641 + +BUG: 1832306 +Change-Id: I1c7533fedd17dcaffc0f7a5a918c87356133a81c +Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220369 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 29 ++++++++++++++++++++--------- + 1 file changed, 20 insertions(+), 9 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index c78983a..df78fef 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1693,6 +1693,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + rpc_clnt_t *rpc = NULL; + dict_t *rsp_dict = NULL; + int32_t cmd = GF_OP_CMD_NONE; ++ glusterd_volinfo_t *volinfo = NULL; + + this = THIS; + rsp_dict = dict_new(); +@@ -1724,18 +1725,28 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + cds_list_for_each_entry_safe(pending_node, tmp, &selected, list) + { + rpc = glusterd_pending_node_get_rpc(pending_node); ++ /* In the case of rebalance if the rpc object is null, we try to ++ * create the rpc object. if the rebalance daemon is down, it returns ++ * -1. otherwise, rpc object will be created and referenced. ++ */ + if (!rpc) { +- if (pending_node->type == GD_NODE_REBALANCE) { +- ret = 0; +- glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx); ++ if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) { ++ volinfo = pending_node->node; ++ ret = glusterd_rebalance_rpc_create(volinfo); ++ if (ret) { ++ ret = 0; ++ glusterd_defrag_volume_node_rsp(req_dict, NULL, op_ctx); ++ goto out; ++ } else { ++ rpc = glusterd_defrag_rpc_get(volinfo->rebal.defrag); ++ } ++ } else { ++ ret = -1; ++ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, ++ "Brick Op failed " ++ "due to rpc failure."); + goto out; + } +- +- ret = -1; +- gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, +- "Brick Op failed " +- "due to rpc failure."); +- goto out; + } + + /* Redirect operation to be detach tier via rebalance flow. */ +-- +1.8.3.1 + diff --git a/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch b/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch new file mode 100644 index 0000000..6ed4f1c --- /dev/null +++ b/SOURCES/0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch @@ -0,0 +1,87 @@ +From 2e6a5e504e66bc95208420e4882e453a53ac9ea2 Mon Sep 17 00:00:00 2001 +From: schaffung <ssivakum@redhat.com> +Date: Mon, 2 Nov 2020 11:18:01 +0530 +Subject: [PATCH 485/511] cli-rpc: conditional init of global quota rpc (#1578) + +Issue: It is seem that the initialization of rpc to +connect with quotad is done in every glusterfs cli command, +irrespective of whether the quota feature is enabled or disabled. +This seems to be an overkill. + +Code change: The file /var/run/quotad/quotad.pid is present +signals that quotad is enabled. Hence we can put a conditional +check for seeing when this file exists and if it doesn't we +just skip over the initialization of the global quotad rpc. + +This will go on to reduce the extra rpc calls and operations +being performed in the kernel space. + +>Fixes: #1577 +>Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff +>Signed-off-by: srijan-sivakumar <ssivakumar@redhat.com> +Upstream Patch : https://github.com/gluster/glusterfs/pull/1578 + +BUG: 1885966 +Change-Id: Icb69d35330f76ce95626f59af75a12726eb620ff +Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220371 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli.c | 18 +++++++++++++----- + cli/src/cli.h | 3 +++ + 2 files changed, 16 insertions(+), 5 deletions(-) + +diff --git a/cli/src/cli.c b/cli/src/cli.c +index 99a16a0..a76c5a2 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -64,8 +64,7 @@ + extern int connected; + /* using argp for command line parsing */ + +-const char *argp_program_version = +- PACKAGE_NAME" "PACKAGE_VERSION; ++const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION; + const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">"; + + struct rpc_clnt *global_quotad_rpc; +@@ -840,9 +839,18 @@ main(int argc, char *argv[]) + if (!global_rpc) + goto out; + +- global_quotad_rpc = cli_quotad_clnt_rpc_init(); +- if (!global_quotad_rpc) +- goto out; ++ /* ++ * Now, one doesn't need to initialize global rpc ++ * for quota unless and until quota is enabled. ++ * So why not put a check to save all the rpc related ++ * ops here. ++ */ ++ ret = sys_access(QUOTAD_PID_PATH, F_OK); ++ if (!ret) { ++ global_quotad_rpc = cli_quotad_clnt_rpc_init(); ++ if (!global_quotad_rpc) ++ goto out; ++ } + + ret = cli_cmds_register(&state); + if (ret) +diff --git a/cli/src/cli.h b/cli/src/cli.h +index 37e4d9d..c30ae9c 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -30,6 +30,9 @@ + #define CLI_TAB_LENGTH 8 + #define CLI_BRICK_STATUS_LINE_LEN 78 + ++// Quotad pid path. ++#define QUOTAD_PID_PATH "/var/run/gluster/quotad/quotad.pid" ++ + /* Geo-rep command positional arguments' index */ + #define GEO_REP_CMD_INDEX 1 + #define GEO_REP_CMD_CONFIG_INDEX 4 +-- +1.8.3.1 + diff --git a/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch b/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch new file mode 100644 index 0000000..60750db --- /dev/null +++ b/SOURCES/0486-glusterd-brick-sock-file-deleted-log-error-1560.patch @@ -0,0 +1,87 @@ +From 9b19d4841fc3002d30ec3e44c85ec37682c11bfb Mon Sep 17 00:00:00 2001 +From: schaffung <ssivakum@redhat.com> +Date: Thu, 22 Oct 2020 13:07:09 +0530 +Subject: [PATCH 486/511] glusterd: brick sock file deleted, log error (#1560) + +Issue: The satus of the brick as tracked by glusterd is +stopped if the socket file corresponding to a running +brick process is absent in /var/run/gluster. The glusterd +keeps on trying to reconnect ( rpc layer ) but it fails. + +Code change: Rather than registering the rpc connection +with the help of the given sockfilepath which is not +even present as it keeps on reconnecting, why not log +this as an error and not try to reconnect using the +non-existing sock file path. + +>Fixes: #1526 +>Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757 +>Signed-off-by: srijan-sivakumar <ssivakumar@redhat.com> +Upstream Patch : https://github.com/gluster/glusterfs/pull/1560 + +BUG: 1882923 +Change-Id: I6c81691ab1624c66dec74f5ffcc6c383201ac757 +Signed-off-by: Srijan Sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220376 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index d25fc8a..a72c494 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -6310,7 +6310,7 @@ find_compatible_brick(glusterd_conf_t *conf, glusterd_volinfo_t *volinfo, + check if passed pid is match with running glusterfs process + */ + +-int ++static int + glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) + { + char fname[128] = ""; +@@ -6383,7 +6383,17 @@ glusterd_get_sock_from_brick_pid(int pid, char *sockpath, size_t len) + + if (tmpsockpath[0]) { + strncpy(sockpath, tmpsockpath, i); +- ret = 0; ++ /* ++ * Condition to check if the brick socket file is present ++ * in the stated path or not. This helps in preventing ++ * constant re-connect triggered in the RPC layer and also ++ * a log message would help out the user. ++ */ ++ ret = sys_access(sockpath, F_OK); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND, ++ "%s not found", sockpath, NULL); ++ } + } + + return ret; +@@ -6581,7 +6591,20 @@ glusterd_brick_start(glusterd_volinfo_t *volinfo, + if (!is_brick_mx_enabled()) { + glusterd_set_brick_socket_filepath( + volinfo, brickinfo, socketpath, sizeof(socketpath)); ++ /* ++ * Condition to check if the brick socket file is present ++ * in the stated path or not. This helps in preventing ++ * constant re-connect triggered in the RPC layer and also ++ * a log message would help out the user. ++ */ ++ ret = sys_access(socketpath, F_OK); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND, ++ "%s not found", socketpath, NULL); ++ goto out; ++ } + } ++ + gf_log(this->name, GF_LOG_DEBUG, + "Using %s as sockfile for brick %s of volume %s ", + socketpath, brickinfo->path, volinfo->volname); +-- +1.8.3.1 + diff --git a/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch b/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch new file mode 100644 index 0000000..ac0d1cc --- /dev/null +++ b/SOURCES/0487-Events-Log-file-not-re-opened-after-logrotate.patch @@ -0,0 +1,56 @@ +From c961ee1d7c1abb2552b79ed39ed7fd1bd1b3962f Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar <ssivakum@redhat.com> +Date: Fri, 7 Aug 2020 15:02:07 +0530 +Subject: [PATCH 487/511] Events: Log file not re-opened after logrotate. + +Issue: The logging is being done in the same file +even after the logrotate utility has changed the file. +This causes the logfile to grow indefinitely. + +Code Changes: Using the WatchedFileHandler class instead +of FileHandler class. This watches the file it is logging +into and if the file changes, it is closed and reopened +using the file name. Hence after file rotate, a new file +will be used for logging instead of continuing with +the same old file. + +>Fixes: #1289 +>Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2 +>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Upstream Patch : https://review.gluster.org/c/glusterfs/+/24820 + +BUG: 1814744 +Change-Id: I773d04f17613a03709cb682692efb39fd8e664e2 +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220370 +Reviewed-by: Shwetha Acharya <sacharya@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + events/src/utils.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/events/src/utils.py b/events/src/utils.py +index 38b707a..6d4e079 100644 +--- a/events/src/utils.py ++++ b/events/src/utils.py +@@ -13,6 +13,7 @@ import sys + import json + import os + import logging ++import logging.handlers + import fcntl + from errno import EBADF + from threading import Thread +@@ -98,7 +99,7 @@ def setup_logger(): + logger.setLevel(logging.INFO) + + # create the logging file handler +- fh = logging.FileHandler(LOG_FILE) ++ fh = logging.handlers.WatchedFileHandler(LOG_FILE) + + formatter = logging.Formatter("[%(asctime)s] %(levelname)s " + "[%(module)s - %(lineno)s:%(funcName)s] " +-- +1.8.3.1 + diff --git a/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch b/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch new file mode 100644 index 0000000..310bc53 --- /dev/null +++ b/SOURCES/0488-glusterd-afr-enable-granular-entry-heal-by-default.patch @@ -0,0 +1,864 @@ +From 0502383024cbf7e4776816e0a992dccc484a3cf2 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Tue, 8 Dec 2020 17:23:22 +0530 +Subject: [PATCH 488/511] glusterd/afr: enable granular-entry-heal by default + +XXXXXXXXXXXXXXXXXXX + IMPORTANT: +XXXXXXXXXXXXXXXXXXXX +I see that for rhgs-3.5.3, GD_OP_VERSION_MAX is GD_OP_VERSION_7_0. Since +this patch should only act on new volumes in rhgs-3.5.4, I am bumping +the op-version to GD_OP_VERSION_7_1. In glusterfs upstream, the patch +acts only if op-version >= GD_OP_VERSION_9_0 as seen in the commit +messae below. + +Upstream patch details: +/------------------------------------------------------------------------------/ +1. The option has been enabled and tested for quite some time now in RHHI-V +downstream and I think it is safe to make it 'on' by default. Since it +is not possible to simply change it from 'off' to 'on' without breaking +rolling upgrades, old clients etc., I have made it default only for new volumes +starting from op-verison GD_OP_VERSION_9_0. + +Note: If you do a volume reset, the option will be turned back off. +This is okay as the dir's gfid will be captured in 'xattrop' folder and heals +will proceed. There might be stale entries inside entry-changes' folder, +which will be removed when we enable the option again. + +2. I encountered a cust. issue where entry heal was pending on a dir. with +236436 files in it and the glustershd.log output was just stuck at +"performing entry selfheal", so I have added logs to give us +more info in DEBUG level about whether entry heal and data heal are +progressing (metadata heal doesn't take much time). That way, we have a +quick visual indication to say things are not 'stuck' if we briefly +enable debug logs, instead of taking statedumps or checking profile info +etc. + +>Fixes: #1483 +>Change-Id: I4f116f8c92f8cd33f209b758ff14f3c7e1981422 +>Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Upstream Patch: https://github.com/gluster/glusterfs/pull/1621 +/------------------------------------------------------------------------------/ + +BUG: 1890506 +Change-Id: If449a1e873633616cfc508d74b5c22eb434b55ae +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220555 +Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/globals.h | 4 +- + libglusterfs/src/syncop-utils.c | 4 +- + tests/basic/afr/add-brick-self-heal-non-granular.t | 75 +++++++++++++ + tests/basic/afr/add-brick-self-heal.t | 4 +- + tests/basic/afr/bug-1130892-non-granular.t | 77 ++++++++++++++ + .../basic/afr/bug-1493415-gfid-heal-non-granular.t | 79 ++++++++++++++ + ...507-type-mismatch-error-handling-non-granular.t | 117 +++++++++++++++++++++ + ...1749322-entry-heal-not-happening-non-granular.t | 90 ++++++++++++++++ + .../afr/replace-brick-self-heal-non-granular.t | 65 ++++++++++++ + tests/basic/afr/replace-brick-self-heal.t | 2 +- + tests/bugs/replicate/bug-1130892.t | 2 +- + tests/bugs/replicate/bug-1493415-gfid-heal.t | 2 +- + .../bug-1722507-type-mismatch-error-handling.t | 26 +++-- + .../bug-1749322-entry-heal-not-happening.t | 7 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 5 + + xlators/cluster/afr/src/afr-self-heal-data.c | 3 + + xlators/cluster/afr/src/afr-self-heal-entry.c | 7 +- + xlators/mgmt/glusterd/src/glusterd-utils.c | 13 +++ + 18 files changed, 558 insertions(+), 24 deletions(-) + create mode 100644 tests/basic/afr/add-brick-self-heal-non-granular.t + create mode 100644 tests/basic/afr/bug-1130892-non-granular.t + create mode 100644 tests/basic/afr/bug-1493415-gfid-heal-non-granular.t + create mode 100644 tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t + create mode 100644 tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t + create mode 100644 tests/basic/afr/replace-brick-self-heal-non-granular.t + +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index 31717ed..cc145cd 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -50,7 +50,7 @@ + 1 /* MIN is the fresh start op-version, mostly \ + should not change */ + #define GD_OP_VERSION_MAX \ +- GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \ ++ GD_OP_VERSION_7_1 /* MAX VERSION is the maximum \ + count in VME table, should \ + keep changing with \ + introduction of newer \ +@@ -138,6 +138,8 @@ + + #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */ + ++#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */ ++ + #include "glusterfs/xlator.h" + #include "glusterfs/options.h" + +diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c +index be03527..2269c76 100644 +--- a/libglusterfs/src/syncop-utils.c ++++ b/libglusterfs/src/syncop-utils.c +@@ -495,9 +495,7 @@ syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data, + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + +- ret = fn(subvol, entry, loc, data); +- if (ret) +- break; ++ ret |= fn(subvol, entry, loc, data); + } + gf_dirent_free(&entries); + if (ret) +diff --git a/tests/basic/afr/add-brick-self-heal-non-granular.t b/tests/basic/afr/add-brick-self-heal-non-granular.t +new file mode 100644 +index 0000000..19caf24 +--- /dev/null ++++ b/tests/basic/afr/add-brick-self-heal-non-granular.t +@@ -0,0 +1,75 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++EXPECT 'Created' volinfo_field $V0 'Status'; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status'; ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++ ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++ ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++for i in {1..5} ++do ++ echo $i > $M0/file$i.txt ++done ++ ++# Metadata changes ++TEST setfattr -n user.test -v qwerty $M0/file5.txt ++ ++# Add brick1 ++TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++ ++# New-brick should accuse the old-bricks (Simulating case for data-loss) ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/ ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/ ++ ++# Check if pending xattr and dirty-xattr are set for newly-added-brick ++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 ++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 ++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2 ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++ ++TEST $CLI volume set $V0 self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++ ++# Wait for heal to complete ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# Check if entry-heal has happened ++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort) ++TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort) ++ ++# Test if data was healed ++TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt ++ ++# Test if metadata was healed and exists on both the bricks ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt ++ ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2 ++ ++cleanup; +diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t +index c847e22..7ebf4f6 100644 +--- a/tests/basic/afr/add-brick-self-heal.t ++++ b/tests/basic/afr/add-brick-self-heal.t +@@ -38,8 +38,8 @@ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0 + TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/ + + # Check if pending xattr and dirty-xattr are set for newly-added-brick +-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 +-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 ++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0 ++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1 + EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2 + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +diff --git a/tests/basic/afr/bug-1130892-non-granular.t b/tests/basic/afr/bug-1130892-non-granular.t +new file mode 100644 +index 0000000..3cdbc7d +--- /dev/null ++++ b/tests/basic/afr/bug-1130892-non-granular.t +@@ -0,0 +1,77 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume info; ++ ++# Create a 1X2 replica ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1} ++EXPECT 'Created' volinfo_field $V0 'Status'; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++ ++# Disable self-heal daemon ++TEST gluster volume set $V0 self-heal-daemon off ++ ++# Enable Client side heal ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++ ++# Disable all perf-xlators ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++ ++# Volume start ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++# FUSE Mount ++TEST ${GFS} -s $H0 --volfile-id $V0 $M0 ++ ++# Create files and dirs ++TEST mkdir -p $M0/one/two/ ++TEST `echo "Carpe diem" > $M0/one/two/three` ++ ++# Simulate disk-replacement ++TEST kill_brick $V0 $H0 $B0/${V0}-1 ++EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1 ++TEST rm -rf $B0/${V0}-1/one ++TEST rm -rf $B0/${V0}-1/.glusterfs ++ ++#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI ++#which will create .glusterfs folder. ++mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs ++ ++# Start force ++TEST $CLI volume start $V0 force ++ ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++ ++TEST stat $M0/one ++ ++sleep 1 ++ ++# Check pending xattrs ++EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data ++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry ++EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata ++ ++TEST gluster volume set $V0 self-heal-daemon on ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one ++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two ++EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three ++ ++cleanup; +diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t +new file mode 100644 +index 0000000..aff001c +--- /dev/null ++++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t +@@ -0,0 +1,79 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST $CLI volume set $V0 self-heal-daemon off ++ ++# Create base entry in indices/xattrop ++echo "Data" > $M0/FILE ++ ++#------------------------------------------------------------------------------# ++TEST touch $M0/f1 ++gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1) ++gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1) ++ ++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a ++# brick crash at the point where file got created but no xattrs were set. ++TEST setfattr -x trusted.gfid $B0/${V0}1/f1 ++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 ++ ++# storage/posix considers that a file without gfid changed less than a second ++# before doesn't exist, so we need to wait for a second to force posix to ++# consider that this is a valid file but without gfid. ++sleep 2 ++ ++# Assume there were no pending xattrs on parent dir due to 1st brick crashing ++# too. Then name heal from client must heal the gfid. ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST stat $M0/f1 ++EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1 ++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1 ++ ++#------------------------------------------------------------------------------# ++TEST mkdir $M0/dir ++TEST touch $M0/dir/f2 ++gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2) ++gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2) ++ ++# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a ++# brick crash at the point where file got created but no xattrs were set. ++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2 ++TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 ++ ++#Now simulate setting of pending entry xattr on parent dir of 1st brick. ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir ++create_brick_xattrop_entry $B0/${V0}0 dir ++ ++# storage/posix considers that a file without gfid changed less than a second ++# before doesn't exist, so we need to wait for a second to force posix to ++# consider that this is a valid file but without gfid. ++sleep 2 ++ ++#Trigger entry-heal via shd ++TEST $CLI volume set $V0 self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++ ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2 ++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 ++ ++#------------------------------------------------------------------------------# ++cleanup; +diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t +new file mode 100644 +index 0000000..9079c93 +--- /dev/null ++++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t +@@ -0,0 +1,117 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++ ++########################################################################################## ++# GFID link file and the GFID is missing on one brick and all the bricks are being blamed. ++ ++TEST touch $M0/dir/file ++TEST `echo append>> $M0/dir/file` ++ ++#B0 and B2 must blame B1 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Remove the gfid xattr and the link file on one brick. ++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file) ++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file) ++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file ++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++# Wait for 2 second to force posix to consider that this is a valid file but ++# without gfid. ++sleep 2 ++TEST $CLI volume heal $V0 ++ ++# Heal should not fail as the file is missing gfid xattr and the link file, ++# which is not actually the gfid or type mismatch. ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file ++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++rm -f $M0/dir/file ++ ++ ++########################################################################################### ++# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed. ++ ++TEST $CLI volume heal $V0 disable ++TEST touch $M0/dir/file ++#TEST kill_brick $V0 $H0 $B0/$V0"1" ++ ++#B0 and B2 must blame B1 ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir to trigger index heal. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Remove the gfid xattr and the link file on two bricks. ++gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file) ++gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file) ++TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file ++TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file ++TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++ ++# Wait for 2 second to force posix to consider that this is a valid file but ++# without gfid. ++sleep 2 ++TEST $CLI volume heal $V0 ++ ++# Heal should not fail as the file is missing gfid xattr and the link file, ++# which is not actually the gfid or type mismatch. ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file ++TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file ++TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file ++ ++cleanup +diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t +new file mode 100644 +index 0000000..4f27da4 +--- /dev/null ++++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t +@@ -0,0 +1,90 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup ++ ++function check_gfid_and_link_count ++{ ++ local file=$1 ++ ++ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) ++ TEST [ ! -z $file_gfid_b0 ] ++ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) ++ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) ++ EXPECT $file_gfid_b0 echo $file_gfid_b1 ++ EXPECT $file_gfid_b0 echo $file_gfid_b2 ++ ++ EXPECT "2" stat -c %h $B0/${V0}0/$file ++ EXPECT "2" stat -c %h $B0/${V0}1/$file ++ EXPECT "2" stat -c %h $B0/${V0}2/$file ++} ++TESTS_EXPECTED_IN_LOOP=18 ++ ++################################################################################ ++## Start and create a volume ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0; ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume heal $V0 disable ++EXPECT 'Started' volinfo_field $V0 'Status'; ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST mkdir $M0/dir ++TEST `echo "File 1 " > $M0/dir/file1` ++TEST touch $M0/dir/file{2..4} ++ ++# Remove file2 from 1st & 3rd bricks ++TEST rm -f $B0/$V0"0"/dir/file2 ++TEST rm -f $B0/$V0"2"/dir/file2 ++ ++# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks ++gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3) ++gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3) ++TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 ++TEST rm -f $B0/$V0"0"/dir/file3 ++TEST rm -f $B0/$V0"1"/dir/file3 ++ ++# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick ++gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4) ++gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4) ++TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4 ++TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 ++ ++# B0 and B2 blame each other ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++ ++# Add entry to xattrop dir on first brick. ++xattrop_dir0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_dir0` ++gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) ++TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ ++EXPECT "^1$" get_pending_heal_count $V0 ++ ++# Launch heal ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# All the files must be present on all the bricks after conservative merge and ++# should have the gfid xattr and the .glusterfs hardlink. ++check_gfid_and_link_count dir/file1 ++check_gfid_and_link_count dir/file2 ++check_gfid_and_link_count dir/file3 ++check_gfid_and_link_count dir/file4 ++ ++cleanup +diff --git a/tests/basic/afr/replace-brick-self-heal-non-granular.t b/tests/basic/afr/replace-brick-self-heal-non-granular.t +new file mode 100644 +index 0000000..c86bff1 +--- /dev/null ++++ b/tests/basic/afr/replace-brick-self-heal-non-granular.t +@@ -0,0 +1,65 @@ ++#!/bin/bash ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume set $V0 cluster.granular-entry-heal off ++TEST $CLI volume start $V0 ++TEST $CLI volume set $V0 cluster.data-self-heal off ++TEST $CLI volume set $V0 cluster.metadata-self-heal off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume set $V0 cluster.heal-timeout 5 ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++# Create files ++for i in {1..5} ++do ++ echo $i > $M0/file$i.txt ++done ++ ++# Metadata changes ++TEST setfattr -n user.test -v qwerty $M0/file5.txt ++ ++# Replace brick1 ++TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force ++ ++# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss) ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/ ++ ++# Check if pending xattr and dirty-xattr are set for replaced-brick ++EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 ++EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new ++ ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++ ++TEST $CLI volume set $V0 self-heal-daemon on ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++ ++# Wait for heal to complete ++EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 ++ ++# Check if entry-heal has happened ++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort) ++ ++# To make sure that files were not lost from brick0 ++TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort) ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 ++ ++# Test if data was healed ++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt ++# To make sure that data was not lost from brick0 ++TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt ++ ++# Test if metadata was healed and exists on both the bricks ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt ++EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt ++ ++cleanup; +diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t +index 0360db7..da31c87 100644 +--- a/tests/basic/afr/replace-brick-self-heal.t ++++ b/tests/basic/afr/replace-brick-self-heal.t +@@ -30,7 +30,7 @@ TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit forc + TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/ + + # Check if pending xattr and dirty-xattr are set for replaced-brick +-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 ++EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 + EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new + + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t +index 0f57d66..e23eb26 100644 +--- a/tests/bugs/replicate/bug-1130892.t ++++ b/tests/bugs/replicate/bug-1130892.t +@@ -56,7 +56,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + TEST stat $M0/one + + # Check pending xattrs +-EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data ++EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data + EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry + EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata + +diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t +index 125c35a..9714d5e 100644 +--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t ++++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t +@@ -49,7 +49,7 @@ TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2 + TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2 + + #Now simulate setting of pending entry xattr on parent dir of 1st brick. +-TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir ++TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir + create_brick_xattrop_entry $B0/${V0}0 dir + + #Trigger entry-heal via shd +diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t +index 0aeaaaf..1fdf7ea 100644 +--- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t ++++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t +@@ -23,19 +23,21 @@ TEST mkdir $M0/dir + ########################################################################################## + # GFID link file and the GFID is missing on one brick and all the bricks are being blamed. + +-TEST touch $M0/dir/file +-#TEST kill_brick $V0 $H0 $B0/$V0"1" ++TEST `echo append>> $M0/dir/file` + + #B0 and B2 must blame B1 +-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir +-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir +-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++# Set data part of the xattr also to 1 so that local->need_full_crawl is true. ++# Another way is to create the needed entries inside indices/entry-changes ++# folder. ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir + + # Add entry to xattrop dir to trigger index heal. + xattrop_dir0=$(afr_get_index_path $B0/$V0"0") + base_entry_b0=`ls $xattrop_dir0` + gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) +-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str + EXPECT "^1$" get_pending_heal_count $V0 + + # Remove the gfid xattr and the link file on one brick. +@@ -70,18 +72,20 @@ rm -f $M0/dir/file + + TEST $CLI volume heal $V0 disable + TEST touch $M0/dir/file +-#TEST kill_brick $V0 $H0 $B0/$V0"1" + + #B0 and B2 must blame B1 +-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir +-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir +-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++# Set data part of the xattr also to 1 so that local->need_full_crawl is true. ++# Another way is to create the needed entries inside indices/entry-changes ++# folder. ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir + + # Add entry to xattrop dir to trigger index heal. + xattrop_dir0=$(afr_get_index_path $B0/$V0"0") + base_entry_b0=`ls $xattrop_dir0` + gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) +-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str ++ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str + EXPECT "^1$" get_pending_heal_count $V0 + + # Remove the gfid xattr and the link file on two bricks. +diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +index 9627908..3da873a 100644 +--- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t ++++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t +@@ -59,8 +59,11 @@ TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_ + TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 + + # B0 and B2 blame each other +-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir +-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir ++# Set data part of the xattr also to 1 so that local->need_full_crawl is true. ++# Another way is to create the needed entries inside indices/entry-changes ++# folder. ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir ++setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir + + # Add entry to xattrop dir on first brick. + xattrop_dir0=$(afr_get_index_path $B0/$V0"0") +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 1608f75..36fd3a9 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -2549,6 +2549,11 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid) + } + } + ++ gf_msg_debug( ++ this->name, 0, ++ "heals needed for %s: [entry-heal=%d, metadata-heal=%d, data-heal=%d]", ++ uuid_utoa(gfid), entry_selfheal, metadata_selfheal, data_selfheal); ++ + if (data_selfheal && priv->data_self_heal) + data_ret = afr_selfheal_data(frame, this, fd); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c +index cdff4a5..b97c66b 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-data.c ++++ b/xlators/cluster/afr/src/afr-self-heal-data.c +@@ -239,6 +239,9 @@ afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd, + sink_count = AFR_COUNT(healed_sinks, priv->child_count); + data_lock = alloca0(priv->child_count); + ++ gf_msg_debug(this->name, 0, "gfid:%s, offset=%jd, size=%zu", ++ uuid_utoa(fd->inode->gfid), offset, size); ++ + ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size, + data_lock); + { +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 40be898..00b5b2d 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -206,8 +206,11 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + replies); + } else { + if (!gf_uuid_compare(replies[i].poststat.ia_gfid, +- replies[source].poststat.ia_gfid)) ++ replies[source].poststat.ia_gfid)) { ++ gf_msg_debug(this->name, 0, "skipping %s, no heal needed.", ++ name); + continue; ++ } + + ret = afr_selfheal_recreate_entry(frame, i, source, sources, + fd->inode, name, inode, replies); +@@ -839,7 +842,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, + + out: + loc_wipe(&loc); +- return 0; ++ return ret; + } + + static int +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index a72c494..bd17a82 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13181,6 +13181,19 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + goto out; + } + } ++ ++ if ((conf->op_version >= GD_OP_VERSION_7_1) && ++ (volinfo->status == GLUSTERD_STATUS_NONE)) { ++ ret = dict_set_dynstr_with_alloc(volinfo->dict, ++ "cluster.granular-entry-heal", "on"); ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED, ++ "Failed to set option 'cluster.granular-entry-heal' " ++ "on volume %s", ++ volinfo->volname); ++ goto out; ++ } ++ } + out: + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch b/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch new file mode 100644 index 0000000..dde2156 --- /dev/null +++ b/SOURCES/0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch @@ -0,0 +1,141 @@ +From 2d172144810956225eac3599c943416c4a7e25d0 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Tue, 8 Dec 2020 20:30:23 +0530 +Subject: [PATCH 489/511] glusterd: fix bug in enabling granular-entry-heal + +Upstream patch details: +/------------------------------------------------------------------------------/ +commit f5e1eb87d4af44be3b317b7f99ab88f89c2f0b1a meant to enable the +volume option only for replica volumes but inadvertently enabled +it for all volume types. Fixing it now. + +Also found a bug in glusterd where disabling the option on plain +distribute was succeeding even though setting it in the fist place +fails. Fixed that too. + +>Fixes: #1483 +>Change-Id: Icb6c169a8eec44cc4fb4dd636405d3b3485e91b4 +>Reported-by: Sheetal Pamecha <spamecha@redhat.com> +>Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Upstream Patch: https://github.com/gluster/glusterfs/pull/1752 +/------------------------------------------------------------------------------/ + +BUG: 1890506 +Change-Id: Id63655dac08d2cfda4899d7ee0efe96e72cd6986 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220556 +Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/afr/granular-esh/cli.t | 30 ++++++++++++++++++++----- + xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++- + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 12 +++++----- + 3 files changed, 34 insertions(+), 11 deletions(-) + +diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t +index 995d93e..5ab2e39 100644 +--- a/tests/basic/afr/granular-esh/cli.t ++++ b/tests/basic/afr/granular-esh/cli.t +@@ -11,25 +11,38 @@ TESTS_EXPECTED_IN_LOOP=4 + TEST glusterd + TEST pidof glusterd + +-TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +-# Test that enabling the option should work on a newly created volume +-TEST $CLI volume set $V0 cluster.granular-entry-heal on +-TEST $CLI volume set $V0 cluster.granular-entry-heal off +- + ######################### + ##### DISPERSE TEST ##### + ######################### + # Execute the same command on a disperse volume and make sure it fails. + TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2} ++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume start $V1 ++TEST ! $CLI volume heal $V1 granular-entry-heal enable ++TEST ! $CLI volume heal $V1 granular-entry-heal disable ++ ++TEST $CLI volume stop $V1 ++TEST $CLI volume delete $V1 ++ ++######################### ++##### PLAIN DISTRIBUTE TEST ##### ++######################### ++# Execute the same command on a distribute volume and make sure it fails. ++TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1,2} ++EXPECT "no" volume_get_field $V1 cluster.granular-entry-heal + TEST $CLI volume start $V1 + TEST ! $CLI volume heal $V1 granular-entry-heal enable + TEST ! $CLI volume heal $V1 granular-entry-heal disable ++TEST $CLI volume stop $V1 ++TEST $CLI volume delete $V1 + + ####################### + ###### TIER TEST ###### + ####################### + # Execute the same command on a disperse + replicate tiered volume and make + # sure the option is set on the replicate leg of the volume ++TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2} ++TEST $CLI volume start $V1 + TEST $CLI volume tier $V1 attach replica 2 $H0:$B0/${V1}{3,4} + TEST $CLI volume heal $V1 granular-entry-heal enable + EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal +@@ -52,10 +65,17 @@ TEST kill_brick $V1 $H0 $B0/${V1}3 + # failed. + TEST ! $CLI volume heal $V1 granular-entry-heal enable + EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal ++TEST $CLI volume stop $V1 ++TEST $CLI volume delete $V1 + + ###################### + ### REPLICATE TEST ### + ###################### ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} ++EXPECT "on" volume_get_field $V0 cluster.granular-entry-heal ++# Test that enabling the option should work on a newly created volume ++TEST $CLI volume set $V0 cluster.granular-entry-heal on ++TEST $CLI volume set $V0 cluster.granular-entry-heal off + TEST $CLI volume start $V0 + TEST $CLI volume set $V0 cluster.data-self-heal off + TEST $CLI volume set $V0 cluster.metadata-self-heal off +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index bd17a82..ad3750e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -13183,7 +13183,8 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option) + } + + if ((conf->op_version >= GD_OP_VERSION_7_1) && +- (volinfo->status == GLUSTERD_STATUS_NONE)) { ++ (volinfo->status == GLUSTERD_STATUS_NONE) && ++ (volinfo->type == GF_CLUSTER_TYPE_REPLICATE)) { + ret = dict_set_dynstr_with_alloc(volinfo->dict, + "cluster.granular-entry-heal", "on"); + if (ret) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +index 134b04c..09e6ead 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +@@ -621,11 +621,13 @@ glusterd_handle_heal_options_enable_disable(rpcsvc_request_t *req, dict_t *dict, + goto out; + } + +- if (((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || +- (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) && +- (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)) { +- ret = -1; +- goto out; ++ if ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || ++ (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE)) { ++ if ((volinfo->type != GF_CLUSTER_TYPE_REPLICATE) && ++ (volinfo->type != GF_CLUSTER_TYPE_TIER)) { ++ ret = -1; ++ goto out; ++ } + } + + if ((heal_op == GF_SHD_OP_HEAL_ENABLE) || +-- +1.8.3.1 + diff --git a/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch b/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch new file mode 100644 index 0000000..bd3c777 --- /dev/null +++ b/SOURCES/0490-Segmentation-fault-occurs-during-truncate.patch @@ -0,0 +1,57 @@ +From 5a110946b41619577b365cdceddc4da551ff49f0 Mon Sep 17 00:00:00 2001 +From: kinsu <vpolakis@gmail.com> +Date: Thu, 19 Sep 2019 08:34:32 +0000 +Subject: [PATCH 490/511] Segmentation fault occurs during truncate + +Problem: +Segmentation fault occurs when bricks are nearly full 100% and in +parallel truncate of a file is attempted (No space left on device). +Prerequicite is that performance xlators are activated +(read-ahead, write-behind etc) +while stack unwind of the frames following an error responce +from brick (No space left on device) frame->local includes a memory +location that is not allocated via mem_get but via calloc. +The destroyed frame is always ra_truncate_cbk winded from ra_ftruncate +and the inode ptr is copied to the frame local in the wb_ftruncate. + +Fix: +extra check is added for the pool ptr + +>Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1 +>Fixes: bz#1797882 +>Signed-off-by: kinsu <vpolakis@gmail.com> + +Upstream-patch: https://review.gluster.org/c/glusterfs/+/23445 + +BUG: 1842449 +Change-Id: Ic5d3bd0ab7011e40b2811c6dece063b256e4d9d1 +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220540 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/mem-pool.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index 73503e0..1390747 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -857,6 +857,14 @@ mem_put(void *ptr) + /* Not one of ours; don't touch it. */ + return; + } ++ ++ if (!hdr->pool_list) { ++ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL, ++ LG_MSG_INVALID_ARG, ++ "invalid argument hdr->pool_list NULL"); ++ return; ++ } ++ + pool_list = hdr->pool_list; + pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST]; + +-- +1.8.3.1 + diff --git a/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch b/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch new file mode 100644 index 0000000..375cfd2 --- /dev/null +++ b/SOURCES/0491-glusterd-mount-directory-getting-truncated-on-mounti.patch @@ -0,0 +1,56 @@ +From 0fed8ca9c6c9e3a9041951bc748c7936d0abc8cf Mon Sep 17 00:00:00 2001 +From: nik-redhat <nladha@redhat.com> +Date: Tue, 15 Sep 2020 16:20:19 +0530 +Subject: [PATCH 491/511] glusterd: mount directory getting truncated on + mounting shared_storage + +Issue: +In case of a user created volume the mount point +is the brick path 'ex: /data/brick' but in case of +shared_storage the mount point is '/'.So, here +we increment the array by one so as to get the exact +path of brick without '/', which works fine for other +volumes as the pointer of the brick_dir variable is +at '/', but for shared_storage it is at 'v'(where v is +starting letter of 'var' directory). So, on incrementing +the path we get in case of shared_storage starts from +'ar/lib/glusterd/...' + +Fix: +Only, increment the pointer if the current position is '/', +else the path will be wrong. + +>Fixes: #1480 + +>Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357 +>Signed-off-by: nik-redhat <nladha@redhat.com> + +Upstream patch: https://review.gluster.org/c/glusterfs/+/24989 + +BUG: 1878077 +Change-Id: Id31bb13f58134ae2099884fbc5984c4e055fb357 +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220536 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index ad3750e..b343eee 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -1221,7 +1221,8 @@ glusterd_get_brick_mount_dir(char *brickpath, char *hostname, char *mount_dir) + } + + brick_dir = &brickpath[strlen(mnt_pt)]; +- brick_dir++; ++ if (brick_dir[0] == '/') ++ brick_dir++; + + snprintf(mount_dir, VALID_GLUSTERD_PATHMAX, "/%s", brick_dir); + } +-- +1.8.3.1 + diff --git a/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch b/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch new file mode 100644 index 0000000..a983baa --- /dev/null +++ b/SOURCES/0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch @@ -0,0 +1,188 @@ +From bde1ad97f8739f8370a2bbb92229b1b397ecd82c Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Tue, 8 Dec 2020 19:06:03 +0530 +Subject: [PATCH 492/511] afr/lookup: Pass xattr_req in while doing a selfheal + in lookup + +We were not passing xattr_req when doing a name self heal +as well as a meta data heal. Because of this, some xdata +was missing which causes i/o errors + +Upstream patch details: +> Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f +> Fixes: bz#1728770 +> Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> +Upstream Patch : https://review.gluster.org/#/c/glusterfs/+/23024/ + +BUG: 1726673 +Change-Id: Ibfb1205a7eb0195632dc3820116ffbbb8043545f +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220538 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/replicate/bug-1728770-pass-xattrs.t | 52 ++++++++++++++++++++++++++ + tests/include.rc | 1 + + xlators/cluster/afr/src/afr-common.c | 8 +++- + xlators/cluster/afr/src/afr-self-heal-common.c | 9 ++++- + xlators/cluster/afr/src/afr-self-heal.h | 2 +- + 5 files changed, 67 insertions(+), 5 deletions(-) + create mode 100644 tests/bugs/replicate/bug-1728770-pass-xattrs.t + +diff --git a/tests/bugs/replicate/bug-1728770-pass-xattrs.t b/tests/bugs/replicate/bug-1728770-pass-xattrs.t +new file mode 100644 +index 0000000..159c4fc +--- /dev/null ++++ b/tests/bugs/replicate/bug-1728770-pass-xattrs.t +@@ -0,0 +1,52 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../snapshot.rc ++ ++cleanup; ++ ++function fop_on_bad_disk { ++ local path=$1 ++ mkdir $path/dir{1..1000} 2>/dev/null ++ mv $path/dir1 $path/newdir ++ touch $path/foo.txt ++ echo $? ++} ++ ++function ls_fop_on_bad_disk { ++ local path=$1 ++ ls $path ++ echo $? ++} ++ ++TEST init_n_bricks 6; ++TEST setup_lvm 6; ++ ++TEST glusterd; ++TEST pidof glusterd; ++ ++TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3 $H0:$L4 $H0:$L5 $H0:$L6; ++TEST $CLI volume set $V0 health-check-interval 1000; ++ ++TEST $CLI volume start $V0; ++ ++TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0; ++#corrupt last disk ++dd if=/dev/urandom of=/dev/mapper/patchy_snap_vg_6-brick_lvm bs=512K count=200 status=progress && sync ++ ++ ++# Test the disk is now returning EIO for touch and ls ++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^1$" fop_on_bad_disk "$L6" ++EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^2$" ls_fop_on_bad_disk "$L6" ++ ++TEST touch $M0/foo{1..100} ++TEST $CLI volume remove-brick $V0 replica 3 $H0:$L4 $H0:$L5 $H0:$L6 start ++EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$L4 $H0:$L5 $H0:$L6"; ++ ++#check that remove-brick status should not have any failed or skipped files ++var=`$CLI volume remove-brick $V0 $H0:$L4 $H0:$L5 $H0:$L6 status | grep completed` ++TEST [ `echo $var | awk '{print $5}'` = "0" ] ++TEST [ `echo $var | awk '{print $6}'` = "0" ] ++ ++cleanup; +diff --git a/tests/include.rc b/tests/include.rc +index 762c5e2..c925941 100644 +--- a/tests/include.rc ++++ b/tests/include.rc +@@ -89,6 +89,7 @@ GRAPH_SWITCH_TIMEOUT=10 + UNLINK_TIMEOUT=5 + MDC_TIMEOUT=5 + IO_WAIT_TIMEOUT=5 ++DISK_FAIL_TIMEOUT=80 + + LOGDIR=$(gluster --print-logdir) + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 851ccad..fca2cd5 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2609,6 +2609,10 @@ afr_lookup_sh_metadata_wrap(void *opaque) + dict = dict_new(); + if (!dict) + goto out; ++ if (local->xattr_req) { ++ dict_copy(local->xattr_req, dict); ++ } ++ + ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to set link-count in dict "); +@@ -2617,7 +2621,7 @@ afr_lookup_sh_metadata_wrap(void *opaque) + if (loc_is_nameless(&local->loc)) { + ret = afr_selfheal_unlocked_discover_on(frame, local->inode, + local->loc.gfid, local->replies, +- local->child_up); ++ local->child_up, dict); + } else { + inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent, + local->loc.name, local->replies, +@@ -2791,7 +2795,7 @@ afr_lookup_selfheal_wrap(void *opaque) + + inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent, + local->loc.name, local->replies, +- local->child_up, NULL); ++ local->child_up, local->xattr_req); + if (inode) + inode_unref(inode); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 36fd3a9..9b6575f 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -1861,7 +1861,7 @@ afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict) + int + afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, +- unsigned char *discover_on) ++ unsigned char *discover_on, dict_t *dict) + { + loc_t loc = { + 0, +@@ -1876,6 +1876,8 @@ afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + xattr_req = dict_new(); + if (!xattr_req) + return -ENOMEM; ++ if (dict) ++ dict_copy(dict, xattr_req); + + if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) { + dict_unref(xattr_req); +@@ -1906,11 +1908,14 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, + struct afr_reply *replies) + { + afr_local_t *local = NULL; ++ dict_t *dict = NULL; + + local = frame->local; ++ if (local && local->xattr_req) ++ dict = local->xattr_req; + + return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies, +- local->child_up); ++ local->child_up, dict); + } + + unsigned int +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index b39af02..8f6fb00 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -188,7 +188,7 @@ afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid, + int + afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode, + uuid_t gfid, struct afr_reply *replies, +- unsigned char *discover_on); ++ unsigned char *discover_on, dict_t *dict); + inode_t * + afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent, + const char *name, struct afr_reply *replies, +-- +1.8.3.1 + diff --git a/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch b/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch new file mode 100644 index 0000000..e712886 --- /dev/null +++ b/SOURCES/0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch @@ -0,0 +1,283 @@ +From 03de45e5fb1c8aa5369848ed9e52abd1365e1d21 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Wed, 31 Jul 2019 11:34:19 +0530 +Subject: [PATCH 493/511] geo-rep: Note section is required for ignore_deletes + +There exists a window of 15 sec, where the deletes are picked up +by history crawl when the ignore_deletes is set to true. +And it eventually deletes the file/s from slave which is/are not +supposed to be deleted. Though it is working as per design, a +note regarding this is needed. + +Added a warning message indicating the same. +Also logged info when the worker restarts after ignore-deletes +option set. + +>fixes: bz#1708603 +>Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1 +>Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Upstream patch: https://review.gluster.org/c/glusterfs/+/22702 + +BUG: 1224906 +Change-Id: I103be882fac18b4cef935efa355f5037a396f7c1 +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220757 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 45 ++++++++++++++++++++------ + cli/src/cli-cmd-volume.c | 20 ++++++++---- + cli/src/cli.h | 3 +- + geo-replication/syncdaemon/gsyncd.py | 2 +- + geo-replication/syncdaemon/master.py | 6 ++++ + tests/00-geo-rep/bug-1708603.t | 63 ++++++++++++++++++++++++++++++++++++ + 6 files changed, 120 insertions(+), 19 deletions(-) + create mode 100644 tests/00-geo-rep/bug-1708603.t + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 5fd05f4..34f17c9 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -2901,7 +2901,8 @@ out: + } + + int32_t +-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) ++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **options, char **errstr) + { + int32_t ret = -1; + dict_t *dict = NULL; +@@ -2918,6 +2919,8 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) + char *save_ptr = NULL; + char *slave_temp = NULL; + char *token = NULL; ++ gf_answer_t answer = GF_ANSWER_NO; ++ const char *question = NULL; + + GF_ASSERT(words); + GF_ASSERT(options); +@@ -2990,8 +2993,10 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) + + if (masteri && gsyncd_url_check(words[masteri])) + goto out; +- if (slavei && !glob && !gsyncd_url_check(words[slavei])) ++ if (slavei && !glob && !gsyncd_url_check(words[slavei])) { ++ gf_asprintf(errstr, "Invalid slave url: %s", words[slavei]); + goto out; ++ } + + w = str_getunamb(words[cmdi], opwords); + if (!w) +@@ -3101,16 +3106,36 @@ cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **options) + } + if (!ret) + ret = dict_set_int32(dict, "type", type); +- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) ++ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) { ++ if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") && ++ !strcmp((char *)words[wordcount - 1], "true")) { ++ question = ++ "There exists ~15 seconds delay for the option to take" ++ " effect from stime of the corresponding brick. Please" ++ " check the log for the time, the option is effective." ++ " Proceed"; ++ ++ answer = cli_cmd_get_confirmation(state, question); ++ ++ if (GF_ANSWER_NO == answer) { ++ gf_log("cli", GF_LOG_INFO, ++ "Operation " ++ "cancelled, exiting"); ++ *errstr = gf_strdup("Aborted by user."); ++ ret = -1; ++ goto out; ++ } ++ } ++ + ret = config_parse(words, wordcount, dict, cmdi, glob); ++ } + + out: + if (slave_temp) + GF_FREE(slave_temp); +- if (ret) { +- if (dict) +- dict_unref(dict); +- } else ++ if (ret && dict) ++ dict_unref(dict); ++ else + *options = dict; + + return ret; +@@ -5659,9 +5684,9 @@ cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options) + int32_t ret = -1; + char *w = NULL; + char *volname = NULL; +- char *opwords[] = { +- "enable", "disable", "scrub-throttle", "scrub-frequency", "scrub", +- "signing-time", "signer-threads", NULL}; ++ char *opwords[] = {"enable", "disable", "scrub-throttle", ++ "scrub-frequency", "scrub", "signing-time", ++ "signer-threads", NULL}; + char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL}; + char *scrub_freq_values[] = {"hourly", "daily", "weekly", "biweekly", + "monthly", "minute", NULL}; +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 72504ca..6f5bf8b 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -2457,6 +2457,7 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word, + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + cli_local_t *local = NULL; ++ char *errstr = NULL; + #if (USE_EVENTS) + int ret1 = -1; + int cmd_type = -1; +@@ -2468,16 +2469,21 @@ cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word, + + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GSYNC_SET]; + +- frame = create_frame(THIS, THIS->ctx->pool); +- if (frame == NULL) { +- ret = -1; ++ ret = cli_cmd_gsync_set_parse(state, words, wordcount, &options, &errstr); ++ if (ret) { ++ if (errstr) { ++ cli_err("%s", errstr); ++ GF_FREE(errstr); ++ } else { ++ cli_usage_out(word->pattern); ++ } ++ parse_err = 1; + goto out; + } + +- ret = cli_cmd_gsync_set_parse(words, wordcount, &options); +- if (ret) { +- cli_usage_out(word->pattern); +- parse_err = 1; ++ frame = create_frame(THIS, THIS->ctx->pool); ++ if (frame == NULL) { ++ ret = -1; + goto out; + } + +diff --git a/cli/src/cli.h b/cli/src/cli.h +index c30ae9c..7b4f446 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -269,7 +269,8 @@ int32_t + cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **opt); + + int32_t +-cli_cmd_gsync_set_parse(const char **words, int wordcount, dict_t **opt); ++cli_cmd_gsync_set_parse(struct cli_state *state, const char **words, ++ int wordcount, dict_t **opt, char **errstr); + + int32_t + cli_cmd_quota_parse(const char **words, int wordcount, dict_t **opt); +diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py +index 8940384..215c62d 100644 +--- a/geo-replication/syncdaemon/gsyncd.py ++++ b/geo-replication/syncdaemon/gsyncd.py +@@ -315,7 +315,7 @@ def main(): + + # Log message for loaded config file + if config_file is not None: +- logging.info(lf("Using session config file", path=config_file)) ++ logging.debug(lf("Using session config file", path=config_file)) + + set_term_handler() + excont = FreeObject(exval=0) +diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py +index 08e98f8..98637e7 100644 +--- a/geo-replication/syncdaemon/master.py ++++ b/geo-replication/syncdaemon/master.py +@@ -1549,6 +1549,12 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin): + data_stime = self.get_data_stime() + + end_time = int(time.time()) ++ ++ #as start of historical crawl marks Geo-rep worker restart ++ if gconf.get("ignore-deletes"): ++ logging.info(lf('ignore-deletes config option is set', ++ stime=data_stime)) ++ + logging.info(lf('starting history crawl', + turns=self.history_turns, + stime=data_stime, +diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t +new file mode 100644 +index 0000000..26913f1 +--- /dev/null ++++ b/tests/00-geo-rep/bug-1708603.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++. $(dirname $0)/../geo-rep.rc ++. $(dirname $0)/../env.rc ++ ++SCRIPT_TIMEOUT=300 ++ ++##Cleanup and start glusterd ++cleanup; ++TEST glusterd; ++TEST pidof glusterd ++ ++ ++##Variables ++GEOREP_CLI="gluster volume geo-replication" ++master=$GMV0 ++SH0="127.0.0.1" ++slave=${SH0}::${GSV0} ++num_active=2 ++num_passive=2 ++master_mnt=$M0 ++slave_mnt=$M1 ++ ++############################################################ ++#SETUP VOLUMES AND GEO-REPLICATION ++############################################################ ++ ++##create_and_start_master_volume ++TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4}; ++TEST $CLI volume start $GMV0 ++ ++##create_and_start_slave_volume ++TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4}; ++TEST $CLI volume start $GSV0 ++ ++##Mount master ++TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 ++ ++##Mount slave ++TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 ++ ++#Create geo-rep session ++TEST create_georep_session $master $slave ++ ++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1 ++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++echo y | $GEOREP_CLI $master $slave config ignore-deletes true ++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++ ++#Stop Geo-rep ++TEST $GEOREP_CLI $master $slave stop ++ ++#Delete Geo-rep ++TEST $GEOREP_CLI $master $slave delete ++ ++#Cleanup authorized keys ++sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys ++sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys ++ ++cleanup; ++#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +-- +1.8.3.1 + diff --git a/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch b/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch new file mode 100644 index 0000000..d11b138 --- /dev/null +++ b/SOURCES/0494-glusterd-start-the-brick-on-a-different-port.patch @@ -0,0 +1,54 @@ +From 1b24bc4319203128a9ff7f97fe14f4b3622c4eec Mon Sep 17 00:00:00 2001 +From: Sanju Rakonde <srakonde@redhat.com> +Date: Wed, 26 Aug 2020 20:05:35 +0530 +Subject: [PATCH 494/511] glusterd: start the brick on a different port + +Problem: brick fails to start when the port provided by +glusterd is in use by any other process + +Solution: glusterd should check errno set by runner_run() +and if it is set to EADDRINUSE, it should allocate a new +port to the brick and try to start it again. + +Previously ret value is checked instead of errno, so the +retry part never executed. Now, we initialize errno to 0 +before calling runner framework. and afterwards store the +errno into ret to avoid modification of errno in subsequent +function calls. + +>fixes: #1101 + +>Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a +>Signed-off-by: Sanju Rakonde <srakonde@redhat.com> +>Signed-off-by: nik-redhat <nladha@redhat.com> + +Upstream patch: https://review.gluster.org/c/glusterfs/+/24923/ + +BUG: 1865796 +Change-Id: I1aa048a77c5f8b035dece36976d60602d9753b1a +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220541 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index b343eee..f7030fb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2289,7 +2289,10 @@ retry: + + if (wait) { + synclock_unlock(&priv->big_lock); ++ errno = 0; + ret = runner_run(&runner); ++ if (errno != 0) ++ ret = errno; + synclock_lock(&priv->big_lock); + + if (ret == EADDRINUSE) { +-- +1.8.3.1 + diff --git a/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch b/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch new file mode 100644 index 0000000..6b3f6f5 --- /dev/null +++ b/SOURCES/0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch @@ -0,0 +1,60 @@ +From 17a2a880290d2038c913c23985df620e3c9741b3 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Mon, 16 Mar 2020 15:17:23 +0000 +Subject: [PATCH 495/511] geo-rep: descriptive message when worker crashes due + to EIO + +With this patch now you can notice log if it is due to EIO: + +[2020-03-16 16:24:48.293837] E [syncdutils(worker /bricks/brick1/mbr3):348:log_raise_exception] <top>: Getting "Input/Output error" is most likely due to a. Brick is down or b. Split brain issue. +[2020-03-16 16:24:48.293915] E [syncdutils(worker /bricks/brick1/mbr3):352:log_raise_exception] <top>: This is expected as per design to keep the consistency of the file system. Once the above issue is resolved geo-rep would automatically proceed further. + +>Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca +>Fixes: #1104 +>Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +>Upstream Patch : https://review.gluster.org/c/glusterfs/+/24228/ + +BUG: 1412494 +Change-Id: Ie33f2440bc96089731ce12afa8dab91d9550a7ca +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220874 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/syncdutils.py | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index f43e13b..d5a94d4 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -22,7 +22,7 @@ import socket + from subprocess import PIPE + from threading import Lock, Thread as baseThread + from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED +-from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode ++from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO + from signal import signal, SIGTERM + import select as oselect + from os import waitpid as owaitpid +@@ -346,6 +346,17 @@ def log_raise_exception(excont): + ECONNABORTED): + logging.error(lf('Gluster Mount process exited', + error=errorcode[exc.errno])) ++ elif isinstance(exc, OSError) and exc.errno == EIO: ++ logging.error("Getting \"Input/Output error\" " ++ "is most likely due to " ++ "a. Brick is down or " ++ "b. Split brain issue.") ++ logging.error("This is expected as per design to " ++ "keep the consistency of the file system. " ++ "Once the above issue is resolved " ++ "geo-replication would automatically " ++ "proceed further.") ++ logtag = "FAIL" + else: + logtag = "FAIL" + if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG): +-- +1.8.3.1 + diff --git a/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch b/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch new file mode 100644 index 0000000..590aea3 --- /dev/null +++ b/SOURCES/0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch @@ -0,0 +1,139 @@ +From 5893e64ca8c147b7acfa12cd9824f254d53ee261 Mon Sep 17 00:00:00 2001 +From: mohit84 <moagrawa@redhat.com> +Date: Wed, 4 Nov 2020 09:02:03 +0530 +Subject: [PATCH 496/511] posix: Use MALLOC instead of alloca to allocate + memory for xattrs list (#1730) + +In case of file is having huge xattrs on backend a brick process is +crashed while alloca(size) limit has been crossed 256k because iot_worker +stack size is 256k. + +> Fixes: #1699 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd +> (Cherry pick from commit fd666caa35ac84dd1cba55399761982011b77112) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/pull/1828) + +Change-Id: I100468234f83329a7d65b43cbe4e10450c1ccecd +Bug: 1903468 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220872 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/storage/posix/src/posix-gfid-path.c | 5 ++++- + xlators/storage/posix/src/posix-helpers.c | 3 ++- + xlators/storage/posix/src/posix-inode-fd-ops.c | 12 +++++++++--- + 3 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c +index 64b5c6c..01315ac 100644 +--- a/xlators/storage/posix/src/posix-gfid-path.c ++++ b/xlators/storage/posix/src/posix-gfid-path.c +@@ -195,7 +195,8 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, + if (size == 0) + goto done; + } +- list = alloca(size); ++ ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + *op_errno = errno; + goto err; +@@ -309,6 +310,7 @@ done: + GF_FREE(paths[j]); + } + ret = 0; ++ GF_FREE(list); + return ret; + err: + if (path) +@@ -317,5 +319,6 @@ err: + if (paths[j]) + GF_FREE(paths[j]); + } ++ GF_FREE(list); + return ret; + } +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index 73a44be..ceac52a 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -349,7 +349,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler) + goto out; + } + +- list = alloca(size); ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + goto out; + } +@@ -379,6 +379,7 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler) + ret = 0; + + out: ++ GF_FREE(list); + return ret; + } + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 21119ea..1d37aed 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -3305,7 +3305,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, + goto out; + } + +- list = alloca(size); ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + *op_errno = errno; + goto out; +@@ -3385,6 +3385,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, + op_ret = 0; + + out: ++ GF_FREE(list); + return op_ret; + } + +@@ -3810,7 +3811,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + if (size == 0) + goto done; + } +- list = alloca(size); ++ ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + op_errno = errno; + goto out; +@@ -3937,6 +3939,7 @@ out: + dict_unref(dict); + } + ++ GF_FREE(list); + return 0; + } + +@@ -4136,7 +4139,8 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + if (size == 0) + goto done; + } +- list = alloca(size + 1); ++ ++ list = GF_MALLOC(size, gf_posix_mt_char); + if (!list) { + op_ret = -1; + op_errno = ENOMEM; +@@ -4240,6 +4244,8 @@ out: + if (dict) + dict_unref(dict); + ++ GF_FREE(list); ++ + return 0; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch b/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch new file mode 100644 index 0000000..9d477ae --- /dev/null +++ b/SOURCES/0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch @@ -0,0 +1,80 @@ +From 85a5cce40dba0393e636c0eb5af9d8f8746f2315 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawal@redhat.com> +Date: Thu, 2 Jan 2020 10:23:52 +0530 +Subject: [PATCH 497/511] socket: Use AES128 cipher in SSL if AES is supported + by CPU + +SSL performance is improved after configuring AES128 cipher +so use AES128 cipher as a default cipher on the CPU those +enabled AES bits otherwise ssl use AES256 cipher + +> Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506 +> Fixes: #1050 +> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +> (Cherry pick from commit 177cc09d24515596eb51739ce0a276c26e3c52f1) +> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/23952/) + +Change-Id: I91c50fe987cbb22ed76f8012094730c592c63506 +Bug: 1612973 +Signed-off-by: Mohit Agrawal <moagrawal@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220870 +Tested-by: Mohit Agrawal <moagrawa@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 54cd5df..1ee7320 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -4238,6 +4238,34 @@ static void __attribute__((destructor)) fini_openssl_mt(void) + ERR_free_strings(); + } + ++/* The function returns 0 if AES bit is enabled on the CPU */ ++static int ++ssl_check_aes_bit(void) ++{ ++ FILE *fp = fopen("/proc/cpuinfo", "r"); ++ int ret = 1; ++ size_t len = 0; ++ char *line = NULL; ++ char *match = NULL; ++ ++ GF_ASSERT(fp != NULL); ++ ++ while (getline(&line, &len, fp) > 0) { ++ if (!strncmp(line, "flags", 5)) { ++ match = strstr(line, " aes"); ++ if ((match != NULL) && ((match[4] == ' ') || (match[4] == 0))) { ++ ret = 0; ++ break; ++ } ++ } ++ } ++ ++ free(line); ++ fclose(fp); ++ ++ return ret; ++} ++ + static int + ssl_setup_connection_params(rpc_transport_t *this) + { +@@ -4261,6 +4289,10 @@ ssl_setup_connection_params(rpc_transport_t *this) + return 0; + } + ++ if (!ssl_check_aes_bit()) { ++ cipher_list = "AES128:" DEFAULT_CIPHER_LIST; ++ } ++ + priv->ssl_own_cert = DEFAULT_CERT_PATH; + if (dict_get_str(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) { + if (!priv->ssl_enabled) { +-- +1.8.3.1 + diff --git a/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch b/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch new file mode 100644 index 0000000..078c390 --- /dev/null +++ b/SOURCES/0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch @@ -0,0 +1,69 @@ +From 11d648660b8bd246756f87b2f40c72fbabf084d1 Mon Sep 17 00:00:00 2001 +From: Sunny Kumar <sunkumar@redhat.com> +Date: Tue, 19 May 2020 16:13:01 +0100 +Subject: [PATCH 498/511] geo-rep: Fix corner case in rename on mkdir during + hybrid crawl +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Problem: +The issue is being hit during hybrid mode while handling rename on slave. +In this special case the rename is recorded as mkdir and geo-rep process it +by resolving the path form backend. + +While resolving the backend path during this special handling one corner case is not considered. + +<snip> +Traceback (most recent call last): + File "/usr/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in worker + res = getattr(self.obj, rmeth)(*in_data[2:]) + File "/usr/libexec/glusterfs/python/syncdaemon/resource.py", line 588, in entry_ops + src_entry = get_slv_dir_path(slv_host, slv_volume, gfid) + File "/usr/libexec/glusterfs/python/syncdaemon/syncdutils.py", line 710, in get_slv_dir_path + dir_entry = os.path.join(pfx, pargfid, basename) + File "/usr/lib64/python2.7/posixpath.py", line 75, in join + if b.startswith('/'): +AttributeError: 'int' object has no attribute 'startswith' + +In pyhthon3: +Traceback (most recent call last): + File "<stdin>", line 1, in <module> + File "/usr/lib64/python3.8/posixpath.py", line 90, in join + genericpath._check_arg_types('join', a, *p) + File "/usr/lib64/python3.8/genericpath.py", line 152, in _check_arg_types + raise TypeError(f'{funcname}() argument must be str, bytes, or ' +TypeError: join() argument must be str, bytes, or os.PathLike object, not 'int' +</snip> + +>Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332 +>Fixes: #1250 +>Signed-off-by: Sunny Kumar <sunkumar@redhat.com> +Upstream Patch: https://review.gluster.org/c/glusterfs/+/24468/ + +BUG: 1835229 +Change-Id: I8b926899c60ad8c4ffc886d57028ba70fd21e332 +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220867 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + geo-replication/syncdaemon/syncdutils.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py +index d5a94d4..26c79d0 100644 +--- a/geo-replication/syncdaemon/syncdutils.py ++++ b/geo-replication/syncdaemon/syncdutils.py +@@ -732,6 +732,8 @@ def get_slv_dir_path(slv_host, slv_volume, gfid): + else: + dirpath = dirpath.strip("/") + pargfid = get_gfid_from_mnt(dirpath) ++ if isinstance(pargfid, int): ++ return None + dir_entry = os.path.join(pfx, pargfid, basename) + return dir_entry + +-- +1.8.3.1 + diff --git a/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch b/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch new file mode 100644 index 0000000..edeca1a --- /dev/null +++ b/SOURCES/0499-gfapi-give-appropriate-error-when-size-exceeds.patch @@ -0,0 +1,63 @@ +From f78a5d86c55149d80b6efdf60eae7221c238654e Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Thu, 24 Sep 2020 12:43:51 +0000 +Subject: [PATCH 499/511] gfapi: give appropriate error when size exceeds + +This patch help generate appropriate error message +when the gfapi tries to write data equal to or +greater than 1 Gb due to the limitation at the +socket layer. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/1557 +> fixes: #1518 +> Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f +> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> + +BUG: 1691320 +Change-Id: I1234a0b5a6e675a0b20c6b1afe0f4390fd721f6f +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/219998 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +--- + api/src/gfapi-messages.h | 4 +++- + api/src/glfs-fops.c | 8 ++++++++ + 2 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/api/src/gfapi-messages.h b/api/src/gfapi-messages.h +index 68d1242..2ffd5ac 100644 +--- a/api/src/gfapi-messages.h ++++ b/api/src/gfapi-messages.h +@@ -49,6 +49,8 @@ GLFS_MSGID(API, API_MSG_MEM_ACCT_INIT_FAILED, API_MSG_MASTER_XLATOR_INIT_FAILED, + API_MSG_INODE_LINK_FAILED, API_MSG_STATEDUMP_FAILED, + API_MSG_XREADDIRP_R_FAILED, API_MSG_LOCK_INSERT_MERGE_FAILED, + API_MSG_SETTING_LOCK_TYPE_FAILED, API_MSG_INODE_FIND_FAILED, +- API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED); ++ API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED, ++ API_MSG_INVALID_ARG); + ++#define API_MSG_INVALID_ARG_STR "Invalid" + #endif /* !_GFAPI_MESSAGES_H__ */ +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index e6adea5..051541f 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -1525,6 +1525,14 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + + GF_REF_GET(glfd); + ++ if (iovec->iov_len >= GF_UNIT_GB) { ++ ret = -1; ++ errno = EINVAL; ++ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG, ++ "size >= %llu is not allowed", GF_UNIT_GB, NULL); ++ goto out; ++ } ++ + subvol = glfs_active_subvol(glfd->fs); + if (!subvol) { + ret = -1; +-- +1.8.3.1 + diff --git a/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch b/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch new file mode 100644 index 0000000..4898422 --- /dev/null +++ b/SOURCES/0500-features-shard-Convert-shard-block-indices-to-uint64.patch @@ -0,0 +1,104 @@ +From 60789c658ea22063c26168cb4ce15ac5fd279e58 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Mon, 14 Dec 2020 10:57:03 +0530 +Subject: [PATCH 500/511] features/shard: Convert shard block indices to uint64 + +This patch fixes a crash in FOPs that operate on really large sharded +files where number of participant shards could sometimes exceed +signed int32 max. + +The patch also adds GF_ASSERTs to ensure that number of participating +shards is always greater than 0 for files that do have more than one +shard. + +Upstream: +> https://review.gluster.org/#/c/glusterfs/+/23407/ +> Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6 +> Fixes: #1348 +> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> + +BUG: 1752739 +Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6 +Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221061 +Tested-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + xlators/features/shard/src/shard.c | 14 ++++++++------ + xlators/features/shard/src/shard.h | 6 +++--- + 2 files changed, 11 insertions(+), 9 deletions(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index 16d557b..a967f35 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1855,10 +1855,9 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, + */ + if (!inode) { + gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent" +- " in backend: %s. Directly proceeding to update " +- "file size", +- uuid_utoa(inode->gfid)); ++ "Last shard to be truncated absent in backend: " PRIu64 ++ " of gfid: %s. Directly proceeding to update file size", ++ local->first_block, uuid_utoa(local->loc.inode->gfid)); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; +@@ -2389,6 +2388,7 @@ int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { + get_highest_block(0, local->prebuf.ia_size, local->block_size); + + local->num_blocks = local->last_block - local->first_block + 1; ++ GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = + (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; + +@@ -4809,6 +4809,7 @@ int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { + get_highest_block(local->offset, local->total_size, local->block_size); + + local->num_blocks = local->last_block - local->first_block + 1; ++ GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = local->loc.inode; + + local->inode_list = +@@ -5266,6 +5267,7 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, + local->last_block = + get_highest_block(local->offset, local->total_size, local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; ++ GF_ASSERT(local->num_blocks > 0); + local->inode_list = + GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); + if (!local->inode_list) { +@@ -5274,8 +5276,8 @@ int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, + } + + gf_msg_trace( +- this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " " +- "last_block=%" PRIu32 " num_blocks=%" PRIu32 ++ this->name, 0, "%s: gfid=%s first_block=%" PRIu64 " " ++ "last_block=%" PRIu64 " num_blocks=%" PRIu64 + " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", + gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), + local->first_block, local->last_block, local->num_blocks, local->offset, +diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h +index 1721417..4fe181b 100644 +--- a/xlators/features/shard/src/shard.h ++++ b/xlators/features/shard/src/shard.h +@@ -254,9 +254,9 @@ typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame, + typedef struct shard_local { + int op_ret; + int op_errno; +- int first_block; +- int last_block; +- int num_blocks; ++ uint64_t first_block; ++ uint64_t last_block; ++ uint64_t num_blocks; + int call_count; + int eexist_count; + int create_count; +-- +1.8.3.1 + diff --git a/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch b/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch new file mode 100644 index 0000000..5152df8 --- /dev/null +++ b/SOURCES/0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch @@ -0,0 +1,48 @@ +From 070698ede9c3765c95364e8207c8311dbf895499 Mon Sep 17 00:00:00 2001 +From: kiyer <kiyer@redhat.com> +Date: Tue, 8 Dec 2020 15:18:49 +0530 +Subject: [PATCH 501/511] Cli: Removing old syntax of tier cmds from help menu + +Remove old syntax of attach-tier and detach-tier +commands from help menu. + +Label: DOWNSTREAM ONLY +BUG: 1813866 + +Change-Id: If86e4828b475fb593a5105ca8deac96374f9542d +Signed-off-by: kiyer <kiyer@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220510 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-volume.c | 13 ------------- + 1 file changed, 13 deletions(-) + +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index 6f5bf8b..b6bef80 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -3331,19 +3331,6 @@ struct cli_cmd tier_cmds[] = { + {"volume tier <VOLNAME> detach <start|stop|status|commit|[force]>", + cli_cmd_volume_tier_cbk, "Detach the hot tier from <VOLNAME>"}, + +- {"volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...", +- cli_cmd_volume_tier_cbk, +- "NOTE: this is old syntax, will be deprecated in next release. " +- "Please use gluster volume tier <vol> attach " +- "[<replica COUNT>] <NEW-BRICK>..."}, +- +- {"volume detach-tier <VOLNAME> " +- "<start|stop|status|commit|force>", +- cli_cmd_volume_tier_cbk, +- "NOTE: this is old syntax, will be deprecated in next release. " +- "Please use gluster volume tier <vol> detach " +- "{start|stop|commit} [force]"}, +- + {"volume tier <VOLNAME> status\n" + "volume tier <VOLNAME> start [force]\n" + "volume tier <VOLNAME> stop\n" +-- +1.8.3.1 + diff --git a/SOURCES/0502-dht-fixing-a-permission-update-issue.patch b/SOURCES/0502-dht-fixing-a-permission-update-issue.patch new file mode 100644 index 0000000..7c136d0 --- /dev/null +++ b/SOURCES/0502-dht-fixing-a-permission-update-issue.patch @@ -0,0 +1,225 @@ +From 3f1eee125a35c33ecb078e5d3bfd80d80e63881d Mon Sep 17 00:00:00 2001 +From: Barak Sason Rofman <bsasonro@redhat.com> +Date: Wed, 15 Jan 2020 12:02:05 +0200 +Subject: [PATCH 502/511] dht - fixing a permission update issue + +When bringing back a downed brick and performing lookup from the client +side, the permission on said brick aren't updated on the first lookup, +but only on the second. + +This patch modifies permission update logic so the first lookup will +trigger a permission update on the downed brick. + +LIMITATIONS OF THE PATCH: +As the choice of source depends on whether the directory has layout or not. +Even the directories on the newly added brick will have layout xattr[zeroed], but the same is not true for a root directory. +Hence, in case in the entire cluster only the newly added bricks are up [and others are down], then any change in permission during this time will be overwritten by the older permissions when the cluster is restarted. + +Upstream: +> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24020/ +> fixes: #999 +> Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33 +> Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> + +BUG: 1663821 +Change-Id: Ieb70246d41e59f9cae9f70bc203627a433dfbd33 +Signed-off-by: Barak Sason Rofman <bsasonro@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221116 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/bug-1064147.t | 71 ++++++++++++++++++++++++++++++++ + xlators/cluster/dht/src/dht-common.c | 28 ++++++++++--- + xlators/cluster/dht/src/dht-selfheal.c | 15 +++++-- + xlators/storage/posix/src/posix-common.c | 16 +++---- + 4 files changed, 111 insertions(+), 19 deletions(-) + create mode 100755 tests/bugs/bug-1064147.t + +diff --git a/tests/bugs/bug-1064147.t b/tests/bugs/bug-1064147.t +new file mode 100755 +index 0000000..617a1aa +--- /dev/null ++++ b/tests/bugs/bug-1064147.t +@@ -0,0 +1,71 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++# Initialize ++#------------------------------------------------------------ ++cleanup; ++ ++# Start glusterd ++TEST glusterd; ++TEST pidof glusterd; ++TEST $CLI volume info; ++ ++# Create a volume ++TEST $CLI volume create $V0 $H0:/${V0}{1,2}; ++ ++# Verify volume creation ++ EXPECT "$V0" volinfo_field $V0 'Volume Name'; ++ EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++# Start volume and verify successful start ++ TEST $CLI volume start $V0; ++ EXPECT 'Started' volinfo_field $V0 'Status'; ++ TEST glusterfs -s $H0 --volfile-id=$V0 $M0 ++#------------------------------------------------------------ ++ ++# Test case 1 - Subvolume down + Healing ++#------------------------------------------------------------ ++# Kill 2nd brick process ++TEST kill -9 `ps aux | grep glusterfsd | grep ${V0}2 | grep -v grep | awk '{print $2}'`; ++ ++# Change root permissions ++TEST chmod 444 $M0 ++ ++# Store permission for comparision ++TEST permission_new=`stat -c "%A" $M0` ++ ++# Bring up the killed brick process ++TEST $CLI volume start $V0 force ++ ++# Perform lookup ++sleep 5 ++TEST ls $M0 ++ ++# Check brick permissions ++TEST brick_perm=`stat -c "%A" /${V0}2` ++TEST [ ${brick_perm} = ${permission_new} ] ++#------------------------------------------------------------ ++ ++# Test case 2 - Add-brick + Healing ++#------------------------------------------------------------ ++# Change root permissions ++TEST chmod 777 $M0 ++ ++# Store permission for comparision ++TEST permission_new_2=`stat -c "%A" $M0` ++ ++# Add a 3rd brick ++TEST $CLI volume add-brick $V0 $H0:/${V0}3 ++ ++# Perform lookup ++sleep 5 ++TEST ls $M0 ++ ++# Check permissions on the new brick ++TEST brick_perm2=`stat -c "%A" /${V0}3` ++ ++TEST [ ${brick_perm2} = ${permission_new_2} ] ++ ++cleanup; +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index 4db89df..fe1d0ee 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -1363,13 +1363,29 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + dht_aggregate_xattr(local->xattr, xattr); + } + ++ if (__is_root_gfid(stbuf->ia_gfid)) { ++ ret = dht_dir_has_layout(xattr, conf->xattr_name); ++ if (ret >= 0) { ++ if (is_greater_time(local->prebuf.ia_ctime, ++ local->prebuf.ia_ctime_nsec, ++ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) { ++ /* Choose source */ ++ local->prebuf.ia_gid = stbuf->ia_gid; ++ local->prebuf.ia_uid = stbuf->ia_uid; ++ ++ local->prebuf.ia_ctime = stbuf->ia_ctime; ++ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec; ++ local->prebuf.ia_prot = stbuf->ia_prot; ++ } ++ } ++ } ++ + if (local->stbuf.ia_type != IA_INVAL) { + /* This is not the first subvol to respond */ +- if (!__is_root_gfid(stbuf->ia_gfid) && +- ((local->stbuf.ia_gid != stbuf->ia_gid) || +- (local->stbuf.ia_uid != stbuf->ia_uid) || +- (is_permission_different(&local->stbuf.ia_prot, +- &stbuf->ia_prot)))) { ++ if ((local->stbuf.ia_gid != stbuf->ia_gid) || ++ (local->stbuf.ia_uid != stbuf->ia_uid) || ++ (is_permission_different(&local->stbuf.ia_prot, ++ &stbuf->ia_prot))) { + local->need_attrheal = 1; + } + } +@@ -10969,7 +10985,7 @@ dht_notify(xlator_t *this, int event, void *data, ...) + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || + (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) +- gf_defrag_status_get(conf, output, _gf_false); ++ gf_defrag_status_get(conf, output, _gf_false); + else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) + gf_defrag_start_detach_tier(defrag); + else if (cmd == GF_DEFRAG_CMD_DETACH_START) +diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c +index f5dfff9..f4e17d1 100644 +--- a/xlators/cluster/dht/src/dht-selfheal.c ++++ b/xlators/cluster/dht/src/dht-selfheal.c +@@ -2097,9 +2097,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref(this, layout); + +- if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) { +- /*Use the one in the mds_stbuf*/ +- local->stbuf = local->mds_stbuf; ++ if (local->need_attrheal) { ++ if (__is_root_gfid(local->stbuf.ia_gfid)) { ++ local->stbuf.ia_gid = local->prebuf.ia_gid; ++ local->stbuf.ia_uid = local->prebuf.ia_uid; ++ ++ local->stbuf.ia_ctime = local->prebuf.ia_ctime; ++ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec; ++ local->stbuf.ia_prot = local->prebuf.ia_prot; ++ ++ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) { ++ local->stbuf = local->mds_stbuf; ++ } + } + + if (!__is_root_gfid(local->stbuf.ia_gfid)) { +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index c5a43a1..e5c6e62 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -598,6 +598,7 @@ posix_init(xlator_t *this) + int force_directory = -1; + int create_mask = -1; + int create_directory_mask = -1; ++ char value; + + dir_data = dict_get(this->options, "directory"); + +@@ -654,16 +655,11 @@ posix_init(xlator_t *this) + } + + /* Check for Extended attribute support, if not present, log it */ +- op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", +- 8, 0); +- if (op_ret != -1) { +- ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); +- if (ret) { +- gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, +- "failed to remove xattr: " +- "trusted.glusterfs.test"); +- } +- } else { ++ size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); ++ ++ if ((size == -1) && (errno == EOPNOTSUPP)) { ++ gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, ++ "getxattr returned %zd", size); + tmp_data = dict_get(this->options, "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { +-- +1.8.3.1 + diff --git a/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch b/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch new file mode 100644 index 0000000..466bf4e --- /dev/null +++ b/SOURCES/0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch @@ -0,0 +1,179 @@ +From 5946a6ec18976c0f52162fe0f47e9b5171af87ec Mon Sep 17 00:00:00 2001 +From: Soumya Koduri <skoduri@redhat.com> +Date: Mon, 6 Apr 2020 12:36:44 +0530 +Subject: [PATCH 503/511] gfapi: Suspend synctasks instead of blocking them + +There are certain conditions which blocks the current +execution thread (like waiting on mutex lock or condition +variable or I/O response). In such cases, if it is a +synctask thread, we should suspend the task instead +of blocking it (like done in SYNCOP using synctask_yield) + +This is to avoid deadlock like the one mentioned below - + +1) synctaskA sets fs->migration_in_progress to 1 and + does I/O (LOOKUP) +2) Other synctask threads wait for fs->migration_in_progress + to be reset to 0 by synctaskA and hence blocked +3) but synctaskA cannot resume as all synctask threads are blocked + on (2). + +Note: this same approach is already used by few other components +like syncbarrier etc. + +>Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d +>Fixes: #1146 +>Signed-off-by: Soumya Koduri <skoduri@redhat.com> +Upstream patch: https://review.gluster.org/c/glusterfs/+/24276 + +BUG: 1779238 +Change-Id: If90f870d663bb242c702a5b86ac52eeda67c6f0d +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221081 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +--- + api/src/glfs-internal.h | 34 ++++++++++++++++++++++++++++++++-- + api/src/glfs-resolve.c | 9 +++++++++ + api/src/glfs.c | 9 +++++++++ + 3 files changed, 50 insertions(+), 2 deletions(-) + +diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h +index 55401b2..15cf0ee 100644 +--- a/api/src/glfs-internal.h ++++ b/api/src/glfs-internal.h +@@ -16,6 +16,7 @@ + #include <glusterfs/upcall-utils.h> + #include "glfs-handles.h" + #include <glusterfs/refcount.h> ++#include <glusterfs/syncop.h> + + #define GLFS_SYMLINK_MAX_FOLLOW 2048 + +@@ -207,6 +208,7 @@ struct glfs { + glfs_upcall_cbk up_cbk; /* upcall cbk function to be registered */ + void *up_data; /* Opaque data provided by application + * during upcall registration */ ++ struct list_head waitq; /* waiting synctasks */ + }; + + /* This enum is used to maintain the state of glfd. In case of async fops +@@ -442,6 +444,34 @@ glfs_process_upcall_event(struct glfs *fs, void *data) + THIS = glfd->fd->inode->table->xl->ctx->master; \ + } while (0) + ++#define __GLFS_LOCK_WAIT(fs) \ ++ do { \ ++ struct synctask *task = NULL; \ ++ \ ++ task = synctask_get(); \ ++ \ ++ if (task) { \ ++ list_add_tail(&task->waitq, &fs->waitq); \ ++ pthread_mutex_unlock(&fs->mutex); \ ++ synctask_yield(task, NULL); \ ++ pthread_mutex_lock(&fs->mutex); \ ++ } else { \ ++ /* non-synctask */ \ ++ pthread_cond_wait(&fs->cond, &fs->mutex); \ ++ } \ ++ } while (0) ++ ++#define __GLFS_SYNCTASK_WAKE(fs) \ ++ do { \ ++ struct synctask *waittask = NULL; \ ++ \ ++ while (!list_empty(&fs->waitq)) { \ ++ waittask = list_entry(fs->waitq.next, struct synctask, waitq); \ ++ list_del_init(&waittask->waitq); \ ++ synctask_wake(waittask); \ ++ } \ ++ } while (0) ++ + /* + By default all lock attempts from user context must + use glfs_lock() and glfs_unlock(). This allows +@@ -466,10 +496,10 @@ glfs_lock(struct glfs *fs, gf_boolean_t wait_for_migration) + pthread_mutex_lock(&fs->mutex); + + while (!fs->init) +- pthread_cond_wait(&fs->cond, &fs->mutex); ++ __GLFS_LOCK_WAIT(fs); + + while (wait_for_migration && fs->migration_in_progress) +- pthread_cond_wait(&fs->cond, &fs->mutex); ++ __GLFS_LOCK_WAIT(fs); + + return 0; + } +diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c +index 062b7dc..58b6ace 100644 +--- a/api/src/glfs-resolve.c ++++ b/api/src/glfs-resolve.c +@@ -65,6 +65,9 @@ __glfs_first_lookup(struct glfs *fs, xlator_t *subvol) + fs->migration_in_progress = 0; + pthread_cond_broadcast(&fs->cond); + ++ /* wake up other waiting tasks */ ++ __GLFS_SYNCTASK_WAKE(fs); ++ + return ret; + } + +@@ -154,6 +157,9 @@ __glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode, + fs->migration_in_progress = 0; + pthread_cond_broadcast(&fs->cond); + ++ /* wake up other waiting tasks */ ++ __GLFS_SYNCTASK_WAKE(fs); ++ + return newinode; + } + +@@ -841,6 +847,9 @@ __glfs_migrate_fd(struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd) + fs->migration_in_progress = 0; + pthread_cond_broadcast(&fs->cond); + ++ /* wake up other waiting tasks */ ++ __GLFS_SYNCTASK_WAKE(fs); ++ + return newfd; + } + +diff --git a/api/src/glfs.c b/api/src/glfs.c +index f36616d..ae994fa 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -740,6 +740,7 @@ glfs_new_fs(const char *volname) + + INIT_LIST_HEAD(&fs->openfds); + INIT_LIST_HEAD(&fs->upcall_list); ++ INIT_LIST_HEAD(&fs->waitq); + + PTHREAD_MUTEX_INIT(&fs->mutex, NULL, fs->pthread_flags, GLFS_INIT_MUTEX, + err); +@@ -1228,6 +1229,7 @@ pub_glfs_fini(struct glfs *fs) + call_pool_t *call_pool = NULL; + int fs_init = 0; + int err = -1; ++ struct synctask *waittask = NULL; + + DECLARE_OLD_THIS; + +@@ -1249,6 +1251,13 @@ pub_glfs_fini(struct glfs *fs) + + call_pool = fs->ctx->pool; + ++ /* Wake up any suspended synctasks */ ++ while (!list_empty(&fs->waitq)) { ++ waittask = list_entry(fs->waitq.next, struct synctask, waitq); ++ list_del_init(&waittask->waitq); ++ synctask_wake(waittask); ++ } ++ + while (countdown--) { + /* give some time for background frames to finish */ + pthread_mutex_lock(&fs->mutex); +-- +1.8.3.1 + diff --git a/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch b/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch new file mode 100644 index 0000000..21d7f7f --- /dev/null +++ b/SOURCES/0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch @@ -0,0 +1,109 @@ +From baa566be8832a56fdea7068d84844ec1ec84d8d9 Mon Sep 17 00:00:00 2001 +From: mohit84 <moagrawa@redhat.com> +Date: Thu, 15 Oct 2020 16:28:58 +0530 +Subject: [PATCH 504/511] io-stats: Configure ios_sample_buf_size based on + sample_interval value (#1574) + +io-stats xlator declares a ios_sample_buf_size 64k object(10M) per xlator +but in case of sample_interval is 0 this big buffer is not required so +declare the default value only while sample_interval is not 0.The new +change would be helpful to reduce RSS size for a brick and shd process +while the number of volumes are huge. + +> Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8 +> Fixes: #1542 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry picked from commit f71660eb879a9cd5761e5adbf10c783e959a990a) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1542) + +Change-Id: I3e82cca92e40549355edfac32580169f3ce51af8 +BUG: 1898778 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221183 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/bugs/glusterd/daemon-log-level-option.t | 8 ++++---- + xlators/debug/io-stats/src/io-stats.c | 26 ++++++++++++++++++++++---- + 2 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/tests/bugs/glusterd/daemon-log-level-option.t b/tests/bugs/glusterd/daemon-log-level-option.t +index 66e55e3..5352a63 100644 +--- a/tests/bugs/glusterd/daemon-log-level-option.t ++++ b/tests/bugs/glusterd/daemon-log-level-option.t +@@ -61,8 +61,8 @@ rm -f /var/log/glusterfs/glustershd.log + TEST $CLI volume set all cluster.daemon-log-level WARNING + TEST $CLI volume start $V0 + +-# log should not have any info messages +-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log" ++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator ++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log" + + # log should not have any debug messages + EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log" +@@ -78,8 +78,8 @@ rm -f /var/log/glusterfs/glustershd.log + TEST $CLI volume set all cluster.daemon-log-level ERROR + TEST $CLI volume start $V0 + +-# log should not have any info messages +-EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log" ++# log does have 1 info message specific to configure ios_sample_buf_size in io-stats xlator ++EXPECT 1 Info_messages_count "/var/log/glusterfs/glustershd.log" + + # log should not have any warning messages + EXPECT 0 Warning_messages_count "/var/log/glusterfs/glustershd.log" +diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c +index aa91a0a..9b34895 100644 +--- a/xlators/debug/io-stats/src/io-stats.c ++++ b/xlators/debug/io-stats/src/io-stats.c +@@ -3724,6 +3724,15 @@ xlator_set_loglevel(xlator_t *this, int log_level) + } + } + ++void ++ios_sample_buf_size_configure(char *name, struct ios_conf *conf) ++{ ++ conf->ios_sample_buf_size = 1024; ++ gf_log(name, GF_LOG_INFO, ++ "Configure ios_sample_buf " ++ " size is 1024 because ios_sample_interval is 0"); ++} ++ + int + reconfigure(xlator_t *this, dict_t *options) + { +@@ -3779,8 +3788,13 @@ reconfigure(xlator_t *this, dict_t *options) + int32, out); + GF_OPTION_RECONF("ios-dump-format", dump_format_str, options, str, out); + ios_set_log_format_code(conf, dump_format_str); +- GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, options, +- int32, out); ++ if (conf->ios_sample_interval) { ++ GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, ++ options, int32, out); ++ } else { ++ ios_sample_buf_size_configure(this->name, conf); ++ } ++ + GF_OPTION_RECONF("sys-log-level", sys_log_str, options, str, out); + if (sys_log_str) { + sys_log_level = glusterd_check_log_level(sys_log_str); +@@ -3947,8 +3961,12 @@ init(xlator_t *this) + GF_OPTION_INIT("ios-dump-format", dump_format_str, str, out); + ios_set_log_format_code(conf, dump_format_str); + +- GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32, +- out); ++ if (conf->ios_sample_interval) { ++ GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32, ++ out); ++ } else { ++ ios_sample_buf_size_configure(this->name, conf); ++ } + + ret = ios_init_sample_buf(conf); + if (ret) { +-- +1.8.3.1 + diff --git a/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch b/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch new file mode 100644 index 0000000..a0f6b62 --- /dev/null +++ b/SOURCES/0505-trash-Create-inode_table-only-while-feature-is-enabl.patch @@ -0,0 +1,107 @@ +From 43a8e2c7441b14f5f238cb11d83f32f248b16abb Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Tue, 13 Oct 2020 18:56:20 +0530 +Subject: [PATCH 505/511] trash: Create inode_table only while feature is + enabled + +Currently trash xlator create a inode table(1M) even if +feature is not enabled.In brick_mux environment while 250 +bricks are attached with a single brick process and feature +is not enable brick process increase RSS size unnecessarily. + +Solution: Create inode_table only while a feature is enabled. +The patch reduces 250M RSS size per brick process +if trash feature is not enabled. + +> Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d +> Fixes: #1543 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit 32f25e7b1b4b080ab2640e178b407c878e629376) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1543) + +Change-Id: I11a6fd2b8419fe2988f398be6ec30fb4f3b99a5d +BUG: 1898781 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221184 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/trash/src/trash.c | 47 +++++++++++++++++++++++++++++++++++--- + 1 file changed, 44 insertions(+), 3 deletions(-) + +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index f96ed73..93f020f 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -2235,16 +2235,47 @@ reconfigure(xlator_t *this, dict_t *options) + char trash_dir[PATH_MAX] = { + 0, + }; ++ gf_boolean_t active_earlier = _gf_false; ++ gf_boolean_t active_now = _gf_false; + + priv = this->private; + + GF_VALIDATE_OR_GOTO("trash", priv, out); + ++ active_earlier = priv->state; ++ GF_OPTION_RECONF("trash", active_now, options, bool, out); ++ ++ /* Disable of trash feature is not allowed at this point until ++ we are not able to find an approach to cleanup resource ++ gracefully. Here to disable the feature need to destroy inode ++ table and currently it is difficult to ensure inode is not ++ being used ++ */ ++ if (active_earlier && !active_now) { ++ gf_log(this->name, GF_LOG_INFO, ++ "Disable of trash feature is not allowed " ++ "during graph reconfigure"); ++ ret = 0; ++ goto out; ++ } ++ ++ if (!active_earlier && active_now) { ++ if (!priv->trash_itable) { ++ priv->trash_itable = inode_table_new(0, this); ++ if (!priv->trash_itable) { ++ ret = -ENOMEM; ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to create trash inode_table" ++ " during graph reconfigure"); ++ goto out; ++ } ++ } ++ priv->state = active_now; ++ } ++ + GF_OPTION_RECONF("trash-internal-op", priv->internal, options, bool, out); + GF_OPTION_RECONF("trash-dir", tmp, options, str, out); + +- GF_OPTION_RECONF("trash", priv->state, options, bool, out); +- + if (priv->state) { + ret = create_or_rename_trash_directory(this); + +@@ -2501,7 +2532,17 @@ init(xlator_t *this) + goto out; + } + +- priv->trash_itable = inode_table_new(0, this); ++ if (priv->state) { ++ priv->trash_itable = inode_table_new(0, this); ++ if (!priv->trash_itable) { ++ ret = -ENOMEM; ++ priv->state = _gf_false; ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to create trash inode_table disable trash"); ++ goto out; ++ } ++ } ++ + gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path); + + this->private = (void *)priv; +-- +1.8.3.1 + diff --git a/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch b/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch new file mode 100644 index 0000000..cf978f5 --- /dev/null +++ b/SOURCES/0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch @@ -0,0 +1,499 @@ +From 17a9ce965ef2fec9ee5c8e4b76981bb7cbcf1352 Mon Sep 17 00:00:00 2001 +From: mohit84 <moagrawa@redhat.com> +Date: Mon, 9 Nov 2020 17:15:42 +0530 +Subject: [PATCH 506/511] posix: Attach a posix_spawn_disk_thread with + glusterfs_ctx (#1595) + +Currently posix xlator spawns posix_disk_space_threads per brick and in +case of brick_mux environment while glusterd attached bricks at maximum +level(250) with a single brick process in that case 250 threads are +spawned for all bricks and brick process memory size also increased. + +Solution: Attach a posix_disk_space thread with glusterfs_ctx to + spawn a thread per process basis instead of spawning a per brick + +> Fixes: #1482 +> Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7 +> Cherry-picked from commit 3f93be77e1acf5baacafa97a320e91e6879d1c0e +> Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1482 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> + +Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7 +Bug: 1898776 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220366 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + glusterfsd/src/glusterfsd.c | 4 + + libglusterfs/src/glusterfs/glusterfs.h | 6 ++ + xlators/storage/posix/src/posix-common.c | 68 +++++++++++-- + xlators/storage/posix/src/posix-handle.h | 3 +- + xlators/storage/posix/src/posix-helpers.c | 131 ++++++++++++++----------- + xlators/storage/posix/src/posix-inode-fd-ops.c | 3 +- + xlators/storage/posix/src/posix-mem-types.h | 1 + + xlators/storage/posix/src/posix.h | 12 ++- + 8 files changed, 160 insertions(+), 68 deletions(-) + +diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c +index 955bf1d..ac25255 100644 +--- a/glusterfsd/src/glusterfsd.c ++++ b/glusterfsd/src/glusterfsd.c +@@ -1840,9 +1840,13 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) + INIT_LIST_HEAD(&cmd_args->xlator_options); + INIT_LIST_HEAD(&cmd_args->volfile_servers); + ctx->pxl_count = 0; ++ ctx->diskxl_count = 0; + pthread_mutex_init(&ctx->fd_lock, NULL); + pthread_cond_init(&ctx->fd_cond, NULL); + INIT_LIST_HEAD(&ctx->janitor_fds); ++ pthread_mutex_init(&ctx->xl_lock, NULL); ++ pthread_cond_init(&ctx->xl_cond, NULL); ++ INIT_LIST_HEAD(&ctx->diskth_xl); + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; +diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h +index bf6a987..d3400bf 100644 +--- a/libglusterfs/src/glusterfs/glusterfs.h ++++ b/libglusterfs/src/glusterfs/glusterfs.h +@@ -740,7 +740,13 @@ struct _glusterfs_ctx { + pthread_t janitor; + /* The variable is use to save total posix xlator count */ + uint32_t pxl_count; ++ uint32_t diskxl_count; + ++ /* List of posix xlator use by disk thread*/ ++ struct list_head diskth_xl; ++ pthread_mutex_t xl_lock; ++ pthread_cond_t xl_cond; ++ pthread_t disk_space_check; + char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ + }; + typedef struct _glusterfs_ctx glusterfs_ctx_t; +diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c +index e5c6e62..2c9030b 100644 +--- a/xlators/storage/posix/src/posix-common.c ++++ b/xlators/storage/posix/src/posix-common.c +@@ -138,6 +138,36 @@ posix_inode(xlator_t *this) + return 0; + } + ++static void ++delete_posix_diskxl(xlator_t *this) ++{ ++ struct posix_private *priv = this->private; ++ struct posix_diskxl *pxl = priv->pxl; ++ glusterfs_ctx_t *ctx = this->ctx; ++ uint32_t count = 1; ++ ++ if (pxl) { ++ pthread_mutex_lock(&ctx->xl_lock); ++ { ++ pxl->detach_notify = _gf_true; ++ while (pxl->is_use) ++ pthread_cond_wait(&pxl->cond, &ctx->xl_lock); ++ list_del_init(&pxl->list); ++ priv->pxl = NULL; ++ count = --ctx->diskxl_count; ++ if (count == 0) ++ pthread_cond_signal(&ctx->xl_cond); ++ } ++ pthread_mutex_unlock(&ctx->xl_lock); ++ pthread_cond_destroy(&pxl->cond); ++ GF_FREE(pxl); ++ if (count == 0) { ++ pthread_join(ctx->disk_space_check, NULL); ++ ctx->disk_space_check = 0; ++ } ++ } ++} ++ + /** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +@@ -194,6 +224,8 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) + } + pthread_mutex_unlock(&ctx->fd_lock); + ++ delete_posix_diskxl(this); ++ + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); +@@ -318,6 +350,7 @@ posix_reconfigure(xlator_t *this, dict_t *options) + int32_t force_directory_mode = -1; + int32_t create_mask = -1; + int32_t create_directory_mask = -1; ++ double old_disk_reserve = 0.0; + + priv = this->private; + +@@ -383,6 +416,7 @@ posix_reconfigure(xlator_t *this, dict_t *options) + " fallback to <hostname>:<export>"); + } + ++ old_disk_reserve = priv->disk_reserve; + GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); + /* option can be any one of percent or bytes */ +@@ -390,11 +424,19 @@ posix_reconfigure(xlator_t *this, dict_t *options) + if (priv->disk_reserve < 100.0) + priv->disk_unit = 'p'; + +- if (priv->disk_reserve) { ++ /* Delete a pxl object from a list of disk_reserve while something ++ is changed for reserve option during graph reconfigure ++ */ ++ if (old_disk_reserve != priv->disk_reserve) { ++ delete_posix_diskxl(this); ++ old_disk_reserve = 0; ++ } ++ ++ if (!old_disk_reserve && priv->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, +- "Getting disk space check from thread failed"); ++ "Getting disk space check from thread failed "); + goto out; + } + } +@@ -1008,13 +1050,13 @@ posix_init(xlator_t *this) + " fallback to <hostname>:<export>"); + } + +- _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; + + GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); + + /* option can be any one of percent or bytes */ + _private->disk_unit = 0; ++ pthread_cond_init(&_private->fd_cond, NULL); + if (_private->disk_reserve < 100.0) + _private->disk_unit = 'p'; + +@@ -1162,12 +1204,6 @@ posix_fini(xlator_t *this) + priv->health_check = 0; + } + +- if (priv->disk_space_check) { +- priv->disk_space_check_active = _gf_false; +- (void)gf_thread_cleanup_xint(priv->disk_space_check); +- priv->disk_space_check = 0; +- } +- + if (priv->janitor) { + /*TODO: Make sure the synctask is also complete */ + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); +@@ -1192,10 +1228,24 @@ posix_fini(xlator_t *this) + pthread_join(ctx->janitor, NULL); + } + ++ pthread_mutex_lock(&ctx->xl_lock); ++ { ++ count = --ctx->diskxl_count; ++ if (count == 0) ++ pthread_cond_signal(&ctx->xl_cond); ++ } ++ pthread_mutex_unlock(&ctx->xl_lock); ++ ++ if (count == 0) { ++ pthread_join(ctx->disk_space_check, NULL); ++ ctx->disk_space_check = 0; ++ } ++ + if (priv->fsyncer) { + (void)gf_thread_cleanup_xint(priv->fsyncer); + priv->fsyncer = 0; + } ++ + /*unlock brick dir*/ + if (priv->mount_lock) + (void)sys_closedir(priv->mount_lock); +diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h +index c4d7cb1..8e4c719 100644 +--- a/xlators/storage/posix/src/posix-handle.h ++++ b/xlators/storage/posix/src/posix-handle.h +@@ -206,5 +206,6 @@ int + posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata); + + void +-posix_disk_space_check(xlator_t *this); ++posix_disk_space_check(struct posix_private* priv); ++ + #endif /* !_POSIX_HANDLE_H */ +diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c +index ceac52a..110d383 100644 +--- a/xlators/storage/posix/src/posix-helpers.c ++++ b/xlators/storage/posix/src/posix-helpers.c +@@ -2284,9 +2284,8 @@ unlock: + } + + void +-posix_disk_space_check(xlator_t *this) ++posix_disk_space_check(struct posix_private *priv) + { +- struct posix_private *priv = NULL; + char *subvol_path = NULL; + int op_ret = 0; + double size = 0; +@@ -2295,16 +2294,14 @@ posix_disk_space_check(xlator_t *this) + double totsz = 0; + double freesz = 0; + +- GF_VALIDATE_OR_GOTO(this->name, this, out); +- priv = this->private; +- GF_VALIDATE_OR_GOTO(this->name, priv, out); ++ GF_VALIDATE_OR_GOTO("posix-helpers", priv, out); + + subvol_path = priv->base_path; + + op_ret = sys_statvfs(subvol_path, &buf); + + if (op_ret == -1) { +- gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, ++ gf_msg("posix-disk", GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + "statvfs failed on %s", subvol_path); + goto out; + } +@@ -2328,78 +2325,102 @@ out: + } + + static void * +-posix_disk_space_check_thread_proc(void *data) ++posix_ctx_disk_thread_proc(void *data) + { +- xlator_t *this = NULL; + struct posix_private *priv = NULL; ++ glusterfs_ctx_t *ctx = NULL; + uint32_t interval = 0; +- int ret = -1; +- +- this = data; +- priv = this->private; ++ struct posix_diskxl *pthis = NULL; ++ xlator_t *this = NULL; ++ struct timespec sleep_till = { ++ 0, ++ }; + ++ ctx = data; + interval = 5; +- gf_msg_debug(this->name, 0, +- "disk-space thread started, " ++ ++ gf_msg_debug("glusterfs_ctx", 0, ++ "Ctx disk-space thread started, " + "interval = %d seconds", + interval); +- while (1) { +- /* aborting sleep() is a request to exit this thread, sleep() +- * will normally not return when cancelled */ +- ret = sleep(interval); +- if (ret > 0) +- break; +- /* prevent thread errors while doing the health-check(s) */ +- pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); +- +- /* Do the disk-check.*/ +- posix_disk_space_check(this); +- if (!priv->disk_space_check_active) +- goto out; +- pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); +- } + +-out: +- gf_msg_debug(this->name, 0, "disk space check thread exiting"); +- LOCK(&priv->lock); ++ pthread_mutex_lock(&ctx->xl_lock); + { +- priv->disk_space_check_active = _gf_false; ++ while (ctx->diskxl_count > 0) { ++ list_for_each_entry(pthis, &ctx->diskth_xl, list) ++ { ++ pthis->is_use = _gf_true; ++ pthread_mutex_unlock(&ctx->xl_lock); ++ ++ THIS = this = pthis->xl; ++ priv = this->private; ++ ++ posix_disk_space_check(priv); ++ ++ pthread_mutex_lock(&ctx->xl_lock); ++ pthis->is_use = _gf_false; ++ /* Send a signal to posix_notify function */ ++ if (pthis->detach_notify) ++ pthread_cond_signal(&pthis->cond); ++ } ++ ++ timespec_now_realtime(&sleep_till); ++ sleep_till.tv_sec += 5; ++ (void)pthread_cond_timedwait(&ctx->xl_cond, &ctx->xl_lock, ++ &sleep_till); ++ } + } +- UNLOCK(&priv->lock); ++ pthread_mutex_unlock(&ctx->xl_lock); + + return NULL; + } + + int +-posix_spawn_disk_space_check_thread(xlator_t *xl) ++posix_spawn_disk_space_check_thread(xlator_t *this) + { +- struct posix_private *priv = NULL; +- int ret = -1; ++ int ret = 0; ++ glusterfs_ctx_t *ctx = this->ctx; ++ struct posix_diskxl *pxl = NULL; ++ struct posix_private *priv = this->private; + +- priv = xl->private; ++ pxl = GF_CALLOC(1, sizeof(struct posix_diskxl), gf_posix_mt_diskxl_t); ++ if (!pxl) { ++ ret = -ENOMEM; ++ gf_log(this->name, GF_LOG_ERROR, ++ "Calloc is failed to allocate " ++ "memory for diskxl object"); ++ goto out; ++ } ++ pthread_cond_init(&pxl->cond, NULL); + +- LOCK(&priv->lock); ++ pthread_mutex_lock(&ctx->xl_lock); + { +- /* cancel the running thread */ +- if (priv->disk_space_check_active == _gf_true) { +- pthread_cancel(priv->disk_space_check); +- priv->disk_space_check_active = _gf_false; +- } ++ if (ctx->diskxl_count++ == 0) { ++ ret = gf_thread_create(&ctx->disk_space_check, NULL, ++ posix_ctx_disk_thread_proc, ctx, ++ "posixctxres"); + +- ret = gf_thread_create(&priv->disk_space_check, NULL, +- posix_disk_space_check_thread_proc, xl, +- "posix_reserve"); +- if (ret) { +- priv->disk_space_check_active = _gf_false; +- gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, +- "unable to setup disk space check thread"); +- goto unlock; ++ if (ret) { ++ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, ++ "spawning disk space check thread failed"); ++ ctx->diskxl_count--; ++ pthread_mutex_unlock(&ctx->xl_lock); ++ goto out; ++ } + } ++ pxl->xl = this; ++ priv->pxl = (void *)pxl; ++ list_add_tail(&pxl->list, &ctx->diskth_xl); ++ } ++ pthread_mutex_unlock(&ctx->xl_lock); + +- priv->disk_space_check_active = _gf_true; ++out: ++ if (ret) { ++ if (pxl) { ++ pthread_cond_destroy(&pxl->cond); ++ GF_FREE(pxl); ++ } + } +-unlock: +- UNLOCK(&priv->lock); + return ret; + } + +diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c +index 1d37aed..761e018 100644 +--- a/xlators/storage/posix/src/posix-inode-fd-ops.c ++++ b/xlators/storage/posix/src/posix-inode-fd-ops.c +@@ -37,6 +37,7 @@ + #include <fcntl.h> + #endif /* HAVE_LINKAT */ + ++#include "posix-handle.h" + #include <glusterfs/glusterfs.h> + #include <glusterfs/checksum.h> + #include <glusterfs/dict.h> +@@ -713,7 +714,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + option behaviour + */ + if (priv->disk_reserve) +- posix_disk_space_check(this); ++ posix_disk_space_check(priv); + + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); + +diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h +index 2253f38..bb4c56d 100644 +--- a/xlators/storage/posix/src/posix-mem-types.h ++++ b/xlators/storage/posix/src/posix-mem-types.h +@@ -20,6 +20,7 @@ enum gf_posix_mem_types_ { + gf_posix_mt_paiocb, + gf_posix_mt_inode_ctx_t, + gf_posix_mt_mdata_attr, ++ gf_posix_mt_diskxl_t, + gf_posix_mt_end + }; + #endif +diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h +index 07f367b..4be979c 100644 +--- a/xlators/storage/posix/src/posix.h ++++ b/xlators/storage/posix/src/posix.h +@@ -36,7 +36,6 @@ + #include <glusterfs/compat.h> + #include <glusterfs/timer.h> + #include "posix-mem-types.h" +-#include "posix-handle.h" + #include <glusterfs/call-stub.h> + + #ifdef HAVE_LIBAIO +@@ -138,6 +137,14 @@ struct posix_fd { + char _pad[4]; /* manual padding */ + }; + ++struct posix_diskxl { ++ pthread_cond_t cond; ++ struct list_head list; ++ xlator_t *xl; ++ gf_boolean_t detach_notify; ++ gf_boolean_t is_use; ++}; ++ + struct posix_private { + char *base_path; + int32_t base_path_length; +@@ -207,6 +214,7 @@ struct posix_private { + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; ++ pthread_cond_t disk_cond; + int fsync_queue_count; + + enum { +@@ -233,7 +241,6 @@ struct posix_private { + char disk_unit; + uint32_t disk_space_full; + pthread_t disk_space_check; +- gf_boolean_t disk_space_check_active; + + #ifdef GF_DARWIN_HOST_OS + enum { +@@ -263,6 +270,7 @@ struct posix_private { + gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; + uint32_t rel_fdcount; ++ void *pxl; + }; + + typedef struct { +-- +1.8.3.1 + diff --git a/SOURCES/0507-inode-make-critical-section-smaller.patch b/SOURCES/0507-inode-make-critical-section-smaller.patch new file mode 100644 index 0000000..3b1dac5 --- /dev/null +++ b/SOURCES/0507-inode-make-critical-section-smaller.patch @@ -0,0 +1,764 @@ +From b3a17b67a69142eef1b4adde3409d5e54dda1e0b Mon Sep 17 00:00:00 2001 +From: Amar Tumballi <amarts@redhat.com> +Date: Sat, 9 Feb 2019 13:23:06 +0530 +Subject: [PATCH 507/511] inode: make critical section smaller + +do all the 'static' tasks outside of locked region. + +* hash_dentry() and hash_gfid() are now called outside locked region. +* remove extra __dentry_hash exported in libglusterfs.sym +* avoid checks in locked functions, if the check is done in calling + function. +* implement dentry_destroy(), which handles freeing of dentry separately, + from that of dentry_unset (which takes care of separating dentry from + inode, and table) + +> Updates: bz#1670031 +> Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0 +> Signed-off-by: Amar Tumballi <amarts@redhat.com> +> (Cherry pick from commit 8a90d346b9d3f69ff11241feb0011c90a8e57e30) +> (Review on upstream link https://review.gluster.org/#/c/glusterfs/+/22184/) + +Change-Id: I584213e0748464bb427fbdef3c4ab6615d7d5eb0 +BUG: 1898777 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221189 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/inode.h | 3 - + libglusterfs/src/inode.c | 323 +++++++++++++------------------------ + libglusterfs/src/libglusterfs.sym | 1 - + 3 files changed, 111 insertions(+), 216 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h +index 4421c47..c875653 100644 +--- a/libglusterfs/src/glusterfs/inode.h ++++ b/libglusterfs/src/glusterfs/inode.h +@@ -167,9 +167,6 @@ inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname, + inode_t *newdir, const char *newname, inode_t *inode, + struct iatt *stbuf); + +-dentry_t * +-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name); +- + inode_t * + inode_grep(inode_table_t *table, inode_t *parent, const char *name); + +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 4c3c546..71b2d2a 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -159,27 +159,15 @@ hash_dentry(inode_t *parent, const char *name, int mod) + static int + hash_gfid(uuid_t uuid, int mod) + { +- int ret = 0; +- +- ret = uuid[15] + (uuid[14] << 8); +- +- return ret; ++ return ((uuid[15] + (uuid[14] << 8)) % mod); + } + + static void +-__dentry_hash(dentry_t *dentry) ++__dentry_hash(dentry_t *dentry, const int hash) + { + inode_table_t *table = NULL; +- int hash = 0; +- +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); +- return; +- } + + table = dentry->inode->table; +- hash = hash_dentry(dentry->parent, dentry->name, table->hashsize); + + list_del_init(&dentry->hash); + list_add(&dentry->hash, &table->name_hash[hash]); +@@ -188,49 +176,44 @@ __dentry_hash(dentry_t *dentry) + static int + __is_dentry_hashed(dentry_t *dentry) + { +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); +- return 0; +- } +- + return !list_empty(&dentry->hash); + } + + static void + __dentry_unhash(dentry_t *dentry) + { +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); +- return; +- } +- + list_del_init(&dentry->hash); + } + + static void +-__dentry_unset(dentry_t *dentry) ++dentry_destroy(dentry_t *dentry) + { +- if (!dentry) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, +- "dentry not found"); ++ if (!dentry) + return; +- } ++ ++ GF_FREE(dentry->name); ++ dentry->name = NULL; ++ mem_put(dentry); ++ ++ return; ++} ++ ++static dentry_t * ++__dentry_unset(dentry_t *dentry) ++{ ++ if (!dentry) ++ return NULL; + + __dentry_unhash(dentry); + + list_del_init(&dentry->inode_list); + +- GF_FREE(dentry->name); +- dentry->name = NULL; +- + if (dentry->parent) { + __inode_unref(dentry->parent, false); + dentry->parent = NULL; + } + +- mem_put(dentry); ++ return dentry; + } + + static int +@@ -289,22 +272,14 @@ static int + __is_dentry_cyclic(dentry_t *dentry) + { + int ret = 0; +- inode_t *inode = NULL; +- char *name = "<nul>"; + + ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode); + if (ret) { +- inode = dentry->inode; +- +- if (dentry->name) +- name = dentry->name; +- + gf_msg(dentry->inode->table->name, GF_LOG_CRITICAL, 0, + LG_MSG_DENTRY_CYCLIC_LOOP, +- "detected cyclic loop " +- "formation during inode linkage. inode (%s) linking " +- "under itself as %s", +- uuid_utoa(inode->gfid), name); ++ "detected cyclic loop formation during inode linkage. " ++ "inode (%s) linking under itself as %s", ++ uuid_utoa(dentry->inode->gfid), dentry->name); + } + + return ret; +@@ -313,41 +288,19 @@ __is_dentry_cyclic(dentry_t *dentry) + static void + __inode_unhash(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + list_del_init(&inode->hash); + } + + static int + __is_inode_hashed(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return 0; +- } +- + return !list_empty(&inode->hash); + } + + static void +-__inode_hash(inode_t *inode) ++__inode_hash(inode_t *inode, const int hash) + { +- inode_table_t *table = NULL; +- int hash = 0; +- +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- +- table = inode->table; +- hash = hash_gfid(inode->gfid, 65536); ++ inode_table_t *table = inode->table; + + list_del_init(&inode->hash); + list_add(&inode->hash, &table->inode_hash[hash]); +@@ -359,12 +312,6 @@ __dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name) + dentry_t *dentry = NULL; + dentry_t *tmp = NULL; + +- if (!inode || !name) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, +- "inode || name not found"); +- return NULL; +- } +- + /* earlier, just the ino was sent, which could have been 0, now + we deal with gfid, and if sent gfid is null or 0, no need to + continue with the check */ +@@ -390,12 +337,6 @@ __inode_ctx_free(inode_t *inode) + xlator_t *xl = NULL; + xlator_t *old_THIS = NULL; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + if (!inode->_ctx) { + gf_msg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL, + "_ctx not found"); +@@ -424,12 +365,6 @@ noctx: + static void + __inode_destroy(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + __inode_ctx_free(inode); + + LOCK_DESTROY(&inode->lock); +@@ -472,9 +407,6 @@ inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode) + static void + __inode_activate(inode_t *inode) + { +- if (!inode) +- return; +- + list_move(&inode->list, &inode->table->active); + inode->table->active_size++; + } +@@ -485,19 +417,13 @@ __inode_passivate(inode_t *inode) + dentry_t *dentry = NULL; + dentry_t *t = NULL; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + list_move_tail(&inode->list, &inode->table->lru); + inode->table->lru_size++; + + list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list) + { + if (!__is_dentry_hashed(dentry)) +- __dentry_unset(dentry); ++ dentry_destroy(__dentry_unset(dentry)); + } + } + +@@ -507,12 +433,6 @@ __inode_retire(inode_t *inode) + dentry_t *dentry = NULL; + dentry_t *t = NULL; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return; +- } +- + list_move_tail(&inode->list, &inode->table->purge); + inode->table->purge_size++; + +@@ -520,7 +440,7 @@ __inode_retire(inode_t *inode) + + list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list) + { +- __dentry_unset(dentry); ++ dentry_destroy(__dentry_unset(dentry)); + } + } + +@@ -547,9 +467,6 @@ __inode_unref(inode_t *inode, bool clear) + xlator_t *this = NULL; + uint64_t nlookup = 0; + +- if (!inode) +- return NULL; +- + /* + * Root inode should always be in active list of inode table. So unrefs + * on root inode are no-ops. +@@ -677,16 +594,10 @@ inode_ref(inode_t *inode) + } + + static dentry_t * +-__dentry_create(inode_t *inode, inode_t *parent, const char *name) ++dentry_create(inode_t *inode, inode_t *parent, const char *name) + { + dentry_t *newd = NULL; + +- if (!inode || !parent || !name) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, +- "inode || parent || name not found"); +- return NULL; +- } +- + newd = mem_get0(parent->table->dentry_pool); + if (newd == NULL) { + goto out; +@@ -702,10 +613,6 @@ __dentry_create(inode_t *inode, inode_t *parent, const char *name) + goto out; + } + +- if (parent) +- newd->parent = __inode_ref(parent, false); +- +- list_add(&newd->inode_list, &inode->dentry_list); + newd->inode = inode; + + out: +@@ -717,14 +624,6 @@ __inode_create(inode_table_t *table) + { + inode_t *newi = NULL; + +- if (!table) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, +- LG_MSG_INODE_TABLE_NOT_FOUND, +- "table not " +- "found"); +- return NULL; +- } +- + newi = mem_get0(table->inode_pool); + if (!newi) { + goto out; +@@ -795,9 +694,6 @@ __inode_ref_reduce_by_n(inode_t *inode, uint64_t nref) + { + uint64_t nlookup = 0; + +- if (!inode) +- return NULL; +- + GF_ASSERT(inode->ref >= nref); + + inode->ref -= nref; +@@ -837,17 +733,12 @@ inode_forget_atomic(inode_t *inode, uint64_t nlookup) + } + + dentry_t * +-__dentry_grep(inode_table_t *table, inode_t *parent, const char *name) ++__dentry_grep(inode_table_t *table, inode_t *parent, const char *name, ++ const int hash) + { +- int hash = 0; + dentry_t *dentry = NULL; + dentry_t *tmp = NULL; + +- if (!table || !name || !parent) +- return NULL; +- +- hash = hash_dentry(parent, name, table->hashsize); +- + list_for_each_entry(tmp, &table->name_hash[hash], hash) + { + if (tmp->parent == parent && !strcmp(tmp->name, name)) { +@@ -872,15 +763,16 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) + return NULL; + } + ++ int hash = hash_dentry(parent, name, table->hashsize); ++ + pthread_mutex_lock(&table->lock); + { +- dentry = __dentry_grep(table, parent, name); +- +- if (dentry) ++ dentry = __dentry_grep(table, parent, name, hash); ++ if (dentry) { + inode = dentry->inode; +- +- if (inode) +- __inode_ref(inode, false); ++ if (inode) ++ __inode_ref(inode, false); ++ } + } + pthread_mutex_unlock(&table->lock); + +@@ -947,17 +839,18 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name, + return ret; + } + ++ int hash = hash_dentry(parent, name, table->hashsize); ++ + pthread_mutex_lock(&table->lock); + { +- dentry = __dentry_grep(table, parent, name); +- +- if (dentry) ++ dentry = __dentry_grep(table, parent, name, hash); ++ if (dentry) { + inode = dentry->inode; +- +- if (inode) { +- gf_uuid_copy(gfid, inode->gfid); +- *type = inode->ia_type; +- ret = 0; ++ if (inode) { ++ gf_uuid_copy(gfid, inode->gfid); ++ *type = inode->ia_type; ++ ret = 0; ++ } + } + } + pthread_mutex_unlock(&table->lock); +@@ -978,25 +871,14 @@ __is_root_gfid(uuid_t gfid) + } + + inode_t * +-__inode_find(inode_table_t *table, uuid_t gfid) ++__inode_find(inode_table_t *table, uuid_t gfid, const int hash) + { + inode_t *inode = NULL; + inode_t *tmp = NULL; +- int hash = 0; +- +- if (!table) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, +- LG_MSG_INODE_TABLE_NOT_FOUND, +- "table not " +- "found"); +- goto out; +- } + + if (__is_root_gfid(gfid)) + return table->root; + +- hash = hash_gfid(gfid, 65536); +- + list_for_each_entry(tmp, &table->inode_hash[hash], hash) + { + if (gf_uuid_compare(tmp->gfid, gfid) == 0) { +@@ -1005,7 +887,6 @@ __inode_find(inode_table_t *table, uuid_t gfid) + } + } + +-out: + return inode; + } + +@@ -1022,9 +903,11 @@ inode_find(inode_table_t *table, uuid_t gfid) + return NULL; + } + ++ int hash = hash_gfid(gfid, 65536); ++ + pthread_mutex_lock(&table->lock); + { +- inode = __inode_find(table, gfid); ++ inode = __inode_find(table, gfid, hash); + if (inode) + __inode_ref(inode, false); + } +@@ -1035,7 +918,7 @@ inode_find(inode_table_t *table, uuid_t gfid) + + static inode_t * + __inode_link(inode_t *inode, inode_t *parent, const char *name, +- struct iatt *iatt) ++ struct iatt *iatt, const int dhash) + { + dentry_t *dentry = NULL; + dentry_t *old_dentry = NULL; +@@ -1043,16 +926,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + inode_table_t *table = NULL; + inode_t *link_inode = NULL; + +- if (!inode) { +- errno = EINVAL; +- return NULL; +- } +- + table = inode->table; +- if (!table) { +- errno = EINVAL; +- return NULL; +- } + + if (parent) { + /* We should prevent inode linking between different +@@ -1090,14 +964,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + return NULL; + } + +- old_inode = __inode_find(table, iatt->ia_gfid); ++ int ihash = hash_gfid(iatt->ia_gfid, 65536); ++ ++ old_inode = __inode_find(table, iatt->ia_gfid, ihash); + + if (old_inode) { + link_inode = old_inode; + } else { + gf_uuid_copy(inode->gfid, iatt->ia_gfid); + inode->ia_type = iatt->ia_type; +- __inode_hash(inode); ++ __inode_hash(inode, ihash); + } + } else { + /* @old_inode serves another important purpose - it indicates +@@ -1112,22 +988,16 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + old_inode = inode; + } + +- if (name) { +- if (!strcmp(name, ".") || !strcmp(name, "..")) +- return link_inode; +- +- if (strchr(name, '/')) { +- GF_ASSERT(!"inode link attempted with '/' in name"); +- return NULL; +- } ++ if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) { ++ return link_inode; + } + + /* use only link_inode beyond this point */ + if (parent) { +- old_dentry = __dentry_grep(table, parent, name); ++ old_dentry = __dentry_grep(table, parent, name, dhash); + + if (!old_dentry || old_dentry->inode != link_inode) { +- dentry = __dentry_create(link_inode, parent, name); ++ dentry = dentry_create(link_inode, parent, name); + if (!dentry) { + gf_msg_callingfn( + THIS->name, GF_LOG_ERROR, 0, LG_MSG_DENTRY_CREATE_FAILED, +@@ -1137,15 +1007,20 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + errno = ENOMEM; + return NULL; + } ++ ++ /* dentry linking needs to happen inside lock */ ++ dentry->parent = __inode_ref(parent, false); ++ list_add(&dentry->inode_list, &link_inode->dentry_list); ++ + if (old_inode && __is_dentry_cyclic(dentry)) { + errno = ELOOP; +- __dentry_unset(dentry); ++ dentry_destroy(__dentry_unset(dentry)); + return NULL; + } +- __dentry_hash(dentry); ++ __dentry_hash(dentry, dhash); + + if (old_dentry) +- __dentry_unset(old_dentry); ++ dentry_destroy(__dentry_unset(old_dentry)); + } + } + +@@ -1155,6 +1030,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + inode_t * + inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) + { ++ int hash = 0; + inode_table_t *table = NULL; + inode_t *linked_inode = NULL; + +@@ -1166,10 +1042,18 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) + + table = inode->table; + ++ if (parent && name) { ++ hash = hash_dentry(parent, name, table->hashsize); ++ } ++ ++ if (name && strchr(name, '/')) { ++ GF_ASSERT(!"inode link attempted with '/' in name"); ++ return NULL; ++ } ++ + pthread_mutex_lock(&table->lock); + { +- linked_inode = __inode_link(inode, parent, name, iatt); +- ++ linked_inode = __inode_link(inode, parent, name, iatt, hash); + if (linked_inode) + __inode_ref(linked_inode, false); + } +@@ -1312,48 +1196,47 @@ inode_invalidate(inode_t *inode) + return ret; + } + +-static void ++static dentry_t * + __inode_unlink(inode_t *inode, inode_t *parent, const char *name) + { + dentry_t *dentry = NULL; + char pgfid[64] = {0}; + char gfid[64] = {0}; + +- if (!inode || !parent || !name) +- return; +- + dentry = __dentry_search_for_inode(inode, parent->gfid, name); + + /* dentry NULL for corrupted backend */ + if (dentry) { +- __dentry_unset(dentry); ++ dentry = __dentry_unset(dentry); + } else { + gf_msg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND, + "%s/%s: dentry not found in %s", + uuid_utoa_r(parent->gfid, pgfid), name, + uuid_utoa_r(inode->gfid, gfid)); + } ++ ++ return dentry; + } + + void + inode_unlink(inode_t *inode, inode_t *parent, const char *name) + { +- inode_table_t *table = NULL; ++ inode_table_t *table; ++ dentry_t *dentry; + +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); ++ if (!inode || !parent || !name) + return; +- } + + table = inode->table; + + pthread_mutex_lock(&table->lock); + { +- __inode_unlink(inode, parent, name); ++ dentry = __inode_unlink(inode, parent, name); + } + pthread_mutex_unlock(&table->lock); + ++ dentry_destroy(dentry); ++ + inode_table_prune(table); + } + +@@ -1362,6 +1245,9 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, + inode_t *dstdir, const char *dstname, inode_t *inode, + struct iatt *iatt) + { ++ int hash = 0; ++ dentry_t *dentry = NULL; ++ + if (!inode) { + gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, + "inode not found"); +@@ -1370,13 +1256,26 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, + + table = inode->table; + ++ if (dstname && strchr(dstname, '/')) { ++ GF_ASSERT(!"inode link attempted with '/' in name"); ++ return -1; ++ } ++ ++ if (dstdir && dstname) { ++ hash = hash_dentry(dstdir, dstname, table->hashsize); ++ } ++ + pthread_mutex_lock(&table->lock); + { +- __inode_link(inode, dstdir, dstname, iatt); +- __inode_unlink(inode, srcdir, srcname); ++ __inode_link(inode, dstdir, dstname, iatt, hash); ++ /* pick the old dentry */ ++ dentry = __inode_unlink(inode, srcdir, srcname); + } + pthread_mutex_unlock(&table->lock); + ++ /* free the old dentry */ ++ dentry_destroy(dentry); ++ + inode_table_prune(table); + + return 0; +@@ -1447,12 +1346,6 @@ inode_parent(inode_t *inode, uuid_t pargfid, const char *name) + static int + __inode_has_dentry(inode_t *inode) + { +- if (!inode) { +- gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, +- "inode not found"); +- return 0; +- } +- + return !list_empty(&inode->dentry_list); + } + +@@ -1461,6 +1354,12 @@ inode_has_dentry(inode_t *inode) + { + int dentry_present = 0; + ++ if (!inode) { ++ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND, ++ "inode not found"); ++ return 0; ++ } ++ + LOCK(&inode->lock); + { + dentry_present = __inode_has_dentry(inode); +@@ -1720,7 +1619,7 @@ __inode_table_init_root(inode_table_t *table) + iatt.ia_ino = 1; + iatt.ia_type = IA_IFDIR; + +- __inode_link(root, NULL, NULL, &iatt); ++ __inode_link(root, NULL, NULL, &iatt, 0); + table->root = root; + } + +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index 5a721e0..d060292 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -357,7 +357,6 @@ default_copy_file_range + default_copy_file_range_cbk + default_copy_file_range_failure_cbk + default_copy_file_range_resume +-__dentry_grep + dht_is_linkfile + dict_add + dict_addn +-- +1.8.3.1 + diff --git a/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch b/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch new file mode 100644 index 0000000..9ccc1b5 --- /dev/null +++ b/SOURCES/0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch @@ -0,0 +1,232 @@ +From 87b7689f7727a542c5afa22bdebd3781dd650a2f Mon Sep 17 00:00:00 2001 +From: Csaba Henk <csaba@redhat.com> +Date: Fri, 17 Jul 2020 11:33:36 +0200 +Subject: [PATCH 508/511] fuse: fetch arbitrary number of groups from + /proc/[pid]/status + +Glusterfs so far constrained itself with an arbitrary limit (32) +for the number of groups read from /proc/[pid]/status (this was +the number of groups shown there prior to Linux commit +v3.7-9553-g8d238027b87e (v3.8-rc1~74^2~59); since this commit, all +groups are shown). + +With this change we'll read groups up to the number Glusterfs +supports in general (64k). + +Note: the actual number of groups that are made use of in a +regular Glusterfs setup shall still be capped at ~93 due to limitations +of the RPC transport. To be able to handle more groups than that, +brick side gid resolution (server.manage-gids option) can be used along +with NIS, LDAP or other such networked directory service (see +https://github.com/gluster/glusterdocs/blob/5ba15a2/docs/Administrator%20Guide/Handling-of-users-with-many-groups.md#limit-in-the-glusterfs-protocol +). + +Also adding some diagnostic messages to frame_fill_groups(). + +Upstream: +> Reviewed-on: https://review.gluster.org/c/glusterfs/+/24721 +> Change-Id: I271f3dc3e6d3c44d6d989c7a2073ea5f16c26ee0 +> fixes: #1075 +> Signed-off-by: Csaba Henk <csaba@redhat.com> + +BUG: 1749304 +Change-Id: I80bf99d34087fb95768bf2259d8c4774d9f5d0c5 +Signed-off-by: Csaba Henk <csaba@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220920 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/glusterfs/stack.h | 7 ++++ + tests/bugs/fuse/many-groups-for-acl.t | 13 ++++++- + xlators/mount/fuse/src/fuse-helpers.c | 71 +++++++++++++++++++++++------------ + 3 files changed, 65 insertions(+), 26 deletions(-) + +diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h +index 1758550..bd466d8 100644 +--- a/libglusterfs/src/glusterfs/stack.h ++++ b/libglusterfs/src/glusterfs/stack.h +@@ -429,6 +429,7 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) + if (ngrps <= SMALL_GROUP_COUNT) { + stack->groups = stack->groups_small; + } else { ++ GF_FREE(stack->groups_large); + stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t), + gf_common_mt_groups_t); + if (!stack->groups_large) +@@ -442,6 +443,12 @@ call_stack_alloc_groups(call_stack_t *stack, int ngrps) + } + + static inline int ++call_stack_groups_capacity(call_stack_t *stack) ++{ ++ return max(stack->ngrps, SMALL_GROUP_COUNT); ++} ++ ++static inline int + call_frames_count(call_stack_t *call_stack) + { + call_frame_t *pos; +diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t +index d959f75..a51b1bc 100755 +--- a/tests/bugs/fuse/many-groups-for-acl.t ++++ b/tests/bugs/fuse/many-groups-for-acl.t +@@ -38,6 +38,13 @@ do + done + TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} + ++# Linux < 3.8 exports only first 32 gids of pid to userspace ++kernel_exports_few_gids=0 ++if [ "$OSTYPE" = Linux ] && \ ++ su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then ++ kernel_exports_few_gids=1 ++fi ++ + # preparation done, start the tests + + TEST glusterd +@@ -48,6 +55,8 @@ TEST $CLI volume set $V0 nfs.disable off + TEST $CLI volume set ${V0} server.manage-gids off + TEST $CLI volume start ${V0} + ++# This is just a synchronization hack to make sure the bricks are ++# up before going on. + EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available + + # mount the volume with POSIX ACL support, without --resolve-gids +@@ -69,8 +78,8 @@ TEST [ $? -eq 0 ] + su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null" + TEST [ $? -eq 0 ] + +-su -m ${NEW_USER} -c "touch ${M0}/gid-64/failure > /dev/null" +-TEST [ $? -ne 0 ] ++su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null" ++TEST [ $? -eq $kernel_exports_few_gids ] + + su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null" + TEST [ $? -ne 0 ] +diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c +index 5bfc40c..6e04cd4 100644 +--- a/xlators/mount/fuse/src/fuse-helpers.c ++++ b/xlators/mount/fuse/src/fuse-helpers.c +@@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh) + return state; + } + +-#define FUSE_MAX_AUX_GROUPS \ +- 32 /* We can get only up to 32 aux groups from /proc */ + void + frame_fill_groups(call_frame_t *frame) + { +@@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame) + char filename[32]; + char line[4096]; + char *ptr = NULL; +- FILE *fp = NULL; +- int idx = 0; + long int id = 0; + char *saveptr = NULL; + char *endptr = NULL; +@@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame) + + call_stack_set_groups(frame->root, ngroups, &mygroups); + } else { ++ FILE *fp = NULL; ++ + ret = snprintf(filename, sizeof filename, "/proc/%d/status", + frame->root->pid); +- if (ret >= sizeof filename) ++ if (ret >= sizeof filename) { ++ gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size"); + goto out; ++ } + + fp = fopen(filename, "r"); +- if (!fp) ++ if (!fp) { ++ gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename, ++ strerror(errno)); + goto out; ++ } + +- if (call_stack_alloc_groups(frame->root, ngroups) != 0) +- goto out; ++ for (;;) { ++ gf_boolean_t found_groups = _gf_false; ++ int idx = 0; + +- while ((ptr = fgets(line, sizeof line, fp))) { +- if (strncmp(ptr, "Groups:", 7) != 0) +- continue; ++ if (call_stack_alloc_groups(frame->root, ngroups) != 0) { ++ gf_log(this->name, GF_LOG_ERROR, ++ "failed to allocate gid buffer"); ++ goto out; ++ } + ++ while ((ptr = fgets(line, sizeof line, fp))) { ++ if (strncmp(ptr, "Groups:", 7) == 0) { ++ found_groups = _gf_true; ++ break; ++ } ++ } ++ if (!found_groups) { ++ gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s", ++ filename); ++ break; ++ } + ptr = line + 8; + + for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr; + ptr = strtok_r(NULL, " \t\r\n", &saveptr)) { + errno = 0; + id = strtol(ptr, &endptr, 0); +- if (errno == ERANGE) +- break; +- if (!endptr || *endptr) ++ if (errno == ERANGE || !endptr || *endptr) { ++ gf_log(this->name, GF_LOG_ERROR, "failed to parse %s", ++ filename); + break; +- frame->root->groups[idx++] = id; +- if (idx == FUSE_MAX_AUX_GROUPS) ++ } ++ if (idx < call_stack_groups_capacity(frame->root)) ++ frame->root->groups[idx] = id; ++ idx++; ++ if (idx == GF_MAX_AUX_GROUPS) + break; + } +- +- frame->root->ngrps = idx; +- break; ++ if (idx > call_stack_groups_capacity(frame->root)) { ++ ngroups = idx; ++ rewind(fp); ++ } else { ++ frame->root->ngrps = idx; ++ break; ++ } + } ++ out: ++ if (fp) ++ fclose(fp); + } +- +-out: +- if (fp) +- fclose(fp); + #elif defined(GF_SOLARIS_HOST_OS) + char filename[32]; + char scratch[128]; +@@ -245,7 +268,7 @@ out: + fp = fopen(filename, "r"); + if (fp != NULL) { + if (fgets(scratch, sizeof scratch, fp) != NULL) { +- ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS); ++ ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS); + if (call_stack_alloc_groups(frame->root, ngrps) != 0) { + fclose(fp); + return; +-- +1.8.3.1 + diff --git a/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch b/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch new file mode 100644 index 0000000..fdfc9bb --- /dev/null +++ b/SOURCES/0509-core-configure-optimum-inode-table-hash_size-for-shd.patch @@ -0,0 +1,407 @@ +From a18f03cbf2b5652f8617cb4dd236bb4ca9838d96 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Tue, 6 Oct 2020 16:54:15 +0530 +Subject: [PATCH 509/511] core: configure optimum inode table hash_size for shd + +In brick_mux environment a shd process consume high memory. +After print the statedump i have found it allocates 1M per afr xlator +for all bricks.In case of configure 4k volumes it consumes almost total +6G RSS size in which 4G consumes by inode_tables + +[cluster/replicate.test1-replicate-0 - usage-type gf_common_mt_list_head memusage] +size=1273488 +num_allocs=2 +max_size=1273488 +max_num_allocs=2 +total_allocs=2 + +inode_new_table function allocates memory(1M) for a list of inode and dentry hash. +For shd lru_limit size is 1 so we don't need to create a big hash table so to reduce +RSS size for shd process pass optimum bucket count at the time of creating inode_table. + +> Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb +> Fixes: #1538 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit ca6bbc486e76fdb9a8e07119bb10d7fa45b2e93b) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1538) + +Change-Id: I039716d42321a232fdee1ee8fd50295e638715bb +BUG: 1898777 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221191 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + api/src/glfs-master.c | 2 +- + libglusterfs/src/glusterfs/inode.h | 17 +++++---- + libglusterfs/src/inode.c | 53 +++++++++++++++++--------- + xlators/cluster/afr/src/afr.c | 10 ++++- + xlators/cluster/dht/src/dht-rebalance.c | 3 +- + xlators/cluster/ec/src/ec.c | 2 +- + xlators/features/bit-rot/src/bitd/bit-rot.c | 2 +- + xlators/features/quota/src/quotad-helpers.c | 2 +- + xlators/features/trash/src/trash.c | 4 +- + xlators/mount/fuse/src/fuse-bridge.c | 6 +-- + xlators/nfs/server/src/nfs.c | 2 +- + xlators/protocol/server/src/server-handshake.c | 3 +- + 12 files changed, 66 insertions(+), 40 deletions(-) + +diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c +index b4473b1..9e604d3 100644 +--- a/api/src/glfs-master.c ++++ b/api/src/glfs-master.c +@@ -45,7 +45,7 @@ graph_setup(struct glfs *fs, glusterfs_graph_t *graph) + } + + if (!new_subvol->itable) { +- itable = inode_table_new(131072, new_subvol); ++ itable = inode_table_new(131072, new_subvol, 0, 0); + if (!itable) { + errno = ENOMEM; + ret = -1; +diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h +index c875653..62c093d 100644 +--- a/libglusterfs/src/glusterfs/inode.h ++++ b/libglusterfs/src/glusterfs/inode.h +@@ -35,11 +35,12 @@ typedef struct _dentry dentry_t; + + struct _inode_table { + pthread_mutex_t lock; +- size_t hashsize; /* bucket size of inode hash and dentry hash */ +- char *name; /* name of the inode table, just for gf_log() */ +- inode_t *root; /* root directory inode, with number 1 */ +- xlator_t *xl; /* xlator to be called to do purge */ +- uint32_t lru_limit; /* maximum LRU cache size */ ++ size_t dentry_hashsize; /* Number of buckets for dentry hash*/ ++ size_t inode_hashsize; /* Size of inode hash table */ ++ char *name; /* name of the inode table, just for gf_log() */ ++ inode_t *root; /* root directory inode, with number 1 */ ++ xlator_t *xl; /* xlator to be called to do purge */ ++ uint32_t lru_limit; /* maximum LRU cache size */ + struct list_head *inode_hash; /* buckets for inode hash table */ + struct list_head *name_hash; /* buckets for dentry hash table */ + struct list_head active; /* list of inodes currently active (in an fop) */ +@@ -116,12 +117,14 @@ struct _inode { + #define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1) + + inode_table_t * +-inode_table_new(uint32_t lru_limit, xlator_t *xl); ++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dhash_size, ++ uint32_t inodehash_size); + + inode_table_t * + inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), +- xlator_t *invalidator_xl); ++ xlator_t *invalidator_xl, uint32_t dentry_hashsize, ++ uint32_t inode_hashsize); + + void + inode_table_destroy_all(glusterfs_ctx_t *ctx); +diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c +index 71b2d2a..98f8ea6 100644 +--- a/libglusterfs/src/inode.c ++++ b/libglusterfs/src/inode.c +@@ -763,7 +763,7 @@ inode_grep(inode_table_t *table, inode_t *parent, const char *name) + return NULL; + } + +- int hash = hash_dentry(parent, name, table->hashsize); ++ int hash = hash_dentry(parent, name, table->dentry_hashsize); + + pthread_mutex_lock(&table->lock); + { +@@ -839,7 +839,7 @@ inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name, + return ret; + } + +- int hash = hash_dentry(parent, name, table->hashsize); ++ int hash = hash_dentry(parent, name, table->dentry_hashsize); + + pthread_mutex_lock(&table->lock); + { +@@ -903,7 +903,7 @@ inode_find(inode_table_t *table, uuid_t gfid) + return NULL; + } + +- int hash = hash_gfid(gfid, 65536); ++ int hash = hash_gfid(gfid, table->inode_hashsize); + + pthread_mutex_lock(&table->lock); + { +@@ -964,7 +964,7 @@ __inode_link(inode_t *inode, inode_t *parent, const char *name, + return NULL; + } + +- int ihash = hash_gfid(iatt->ia_gfid, 65536); ++ int ihash = hash_gfid(iatt->ia_gfid, table->inode_hashsize); + + old_inode = __inode_find(table, iatt->ia_gfid, ihash); + +@@ -1043,7 +1043,7 @@ inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt) + table = inode->table; + + if (parent && name) { +- hash = hash_dentry(parent, name, table->hashsize); ++ hash = hash_dentry(parent, name, table->dentry_hashsize); + } + + if (name && strchr(name, '/')) { +@@ -1262,7 +1262,7 @@ inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname, + } + + if (dstdir && dstname) { +- hash = hash_dentry(dstdir, dstname, table->hashsize); ++ hash = hash_dentry(dstdir, dstname, table->dentry_hashsize); + } + + pthread_mutex_lock(&table->lock); +@@ -1626,7 +1626,8 @@ __inode_table_init_root(inode_table_t *table) + inode_table_t * + inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + int32_t (*invalidator_fn)(xlator_t *, inode_t *), +- xlator_t *invalidator_xl) ++ xlator_t *invalidator_xl, uint32_t dentry_hashsize, ++ uint32_t inode_hashsize) + { + inode_table_t *new = NULL; + uint32_t mem_pool_size = lru_limit; +@@ -1644,7 +1645,19 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + new->invalidator_fn = invalidator_fn; + new->invalidator_xl = invalidator_xl; + +- new->hashsize = 14057; /* TODO: Random Number?? */ ++ if (dentry_hashsize == 0) { ++ /* Prime number for uniform distribution */ ++ new->dentry_hashsize = 14057; ++ } else { ++ new->dentry_hashsize = dentry_hashsize; ++ } ++ ++ if (inode_hashsize == 0) { ++ /* The size of hash table always should be power of 2 */ ++ new->inode_hashsize = 65536; ++ } else { ++ new->inode_hashsize = inode_hashsize; ++ } + + /* In case FUSE is initing the inode table. */ + if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES)) +@@ -1658,13 +1671,13 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + if (!new->dentry_pool) + goto out; + +- new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head), +- gf_common_mt_list_head); ++ new->inode_hash = (void *)GF_CALLOC( ++ new->inode_hashsize, sizeof(struct list_head), gf_common_mt_list_head); + if (!new->inode_hash) + goto out; + +- new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head), +- gf_common_mt_list_head); ++ new->name_hash = (void *)GF_CALLOC( ++ new->dentry_hashsize, sizeof(struct list_head), gf_common_mt_list_head); + if (!new->name_hash) + goto out; + +@@ -1675,11 +1688,11 @@ inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl, + if (!new->fd_mem_pool) + goto out; + +- for (i = 0; i < 65536; i++) { ++ for (i = 0; i < new->inode_hashsize; i++) { + INIT_LIST_HEAD(&new->inode_hash[i]); + } + +- for (i = 0; i < new->hashsize; i++) { ++ for (i = 0; i < new->dentry_hashsize; i++) { + INIT_LIST_HEAD(&new->name_hash[i]); + } + +@@ -1717,10 +1730,12 @@ out: + } + + inode_table_t * +-inode_table_new(uint32_t lru_limit, xlator_t *xl) ++inode_table_new(uint32_t lru_limit, xlator_t *xl, uint32_t dentry_hashsize, ++ uint32_t inode_hashsize) + { + /* Only fuse for now requires the inode table with invalidator */ +- return inode_table_with_invalidator(lru_limit, xl, NULL, NULL); ++ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL, ++ dentry_hashsize, inode_hashsize); + } + + int +@@ -2439,8 +2454,10 @@ inode_table_dump(inode_table_t *itable, char *prefix) + return; + } + +- gf_proc_dump_build_key(key, prefix, "hashsize"); +- gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize); ++ gf_proc_dump_build_key(key, prefix, "dentry_hashsize"); ++ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->dentry_hashsize); ++ gf_proc_dump_build_key(key, prefix, "inode_hashsize"); ++ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->inode_hashsize); + gf_proc_dump_build_key(key, prefix, "name"); + gf_proc_dump_write(key, "%s", itable->name); + +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index 8f9e71f..bfa464f 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -594,7 +594,15 @@ init(xlator_t *this) + goto out; + } + +- this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this); ++ if (priv->shd.iamshd) { ++ /* Number of hash bucket should be prime number so declare 131 ++ total dentry hash buckets ++ */ ++ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 131, 128); ++ } else { ++ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this, 0, 0); ++ } ++ + if (!this->itable) { + ret = -ENOMEM; + goto out; +diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c +index 16ac16c..072896d 100644 +--- a/xlators/cluster/dht/src/dht-rebalance.c ++++ b/xlators/cluster/dht/src/dht-rebalance.c +@@ -1168,7 +1168,6 @@ __dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag, + break; + } + +- + offset += ret; + total += ret; + +@@ -2467,7 +2466,7 @@ dht_build_root_inode(xlator_t *this, inode_t **inode) + 0, + }; + +- itable = inode_table_new(0, this); ++ itable = inode_table_new(0, this, 0, 0); + if (!itable) + return; + +diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c +index 3f31c74..4118c3b 100644 +--- a/xlators/cluster/ec/src/ec.c ++++ b/xlators/cluster/ec/src/ec.c +@@ -734,7 +734,7 @@ init(xlator_t *this) + GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed); + GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed); + +- this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this); ++ this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this, 0, 0); + if (!this->itable) + goto failed; + +diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c +index 424c0d5..4e0e798 100644 +--- a/xlators/features/bit-rot/src/bitd/bit-rot.c ++++ b/xlators/features/bit-rot/src/bitd/bit-rot.c +@@ -1658,7 +1658,7 @@ notify(xlator_t *this, int32_t event, void *data, ...) + child->child_up = 1; + child->xl = subvol; + if (!child->table) +- child->table = inode_table_new(4096, subvol); ++ child->table = inode_table_new(4096, subvol, 0, 0); + + _br_qchild_event(this, child, br_brick_connect); + pthread_cond_signal(&priv->cond); +diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c +index d9f0351..46ac116 100644 +--- a/xlators/features/quota/src/quotad-helpers.c ++++ b/xlators/features/quota/src/quotad-helpers.c +@@ -32,7 +32,7 @@ get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req) + UNLOCK(&priv->lock); + + if (active_subvol->itable == NULL) +- active_subvol->itable = inode_table_new(4096, active_subvol); ++ active_subvol->itable = inode_table_new(4096, active_subvol, 0, 0); + + state->itable = active_subvol->itable; + +diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c +index 93f020f..099c887 100644 +--- a/xlators/features/trash/src/trash.c ++++ b/xlators/features/trash/src/trash.c +@@ -2261,7 +2261,7 @@ reconfigure(xlator_t *this, dict_t *options) + + if (!active_earlier && active_now) { + if (!priv->trash_itable) { +- priv->trash_itable = inode_table_new(0, this); ++ priv->trash_itable = inode_table_new(0, this, 0, 0); + if (!priv->trash_itable) { + ret = -ENOMEM; + gf_log(this->name, GF_LOG_ERROR, +@@ -2533,7 +2533,7 @@ init(xlator_t *this) + } + + if (priv->state) { +- priv->trash_itable = inode_table_new(0, this); ++ priv->trash_itable = inode_table_new(0, this, 0, 0); + if (!priv->trash_itable) { + ret = -ENOMEM; + priv->state = _gf_false; +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 1bddac2..919eea3 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -6298,10 +6298,10 @@ fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph) + } + + #if FUSE_KERNEL_MINOR_VERSION >= 11 +- itable = inode_table_with_invalidator(priv->lru_limit, graph->top, +- fuse_inode_invalidate_fn, this); ++ itable = inode_table_with_invalidator( ++ priv->lru_limit, graph->top, fuse_inode_invalidate_fn, this, 0, 0); + #else +- itable = inode_table_new(0, graph->top); ++ itable = inode_table_new(0, graph->top, 0, 0); + #endif + if (!itable) { + ret = -1; +diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c +index ebded41..402be30 100644 +--- a/xlators/nfs/server/src/nfs.c ++++ b/xlators/nfs/server/src/nfs.c +@@ -564,7 +564,7 @@ nfs_init_subvolume(struct nfs_state *nfs, xlator_t *xl) + return -1; + + lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT; +- xl->itable = inode_table_new(lrusize, xl); ++ xl->itable = inode_table_new(lrusize, xl, 0, 0); + if (!xl->itable) { + gf_msg(GF_NFS, GF_LOG_CRITICAL, ENOMEM, NFS_MSG_NO_MEMORY, + "Failed to allocate inode table"); +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index 1d1177d..eeca73c 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -36,7 +36,6 @@ gf_compare_client_version(rpcsvc_request_t *req, int fop_prognum, + return ret; + } + +- + int + server_getspec(rpcsvc_request_t *req) + { +@@ -629,7 +628,7 @@ server_setvolume(rpcsvc_request_t *req) + + /* TODO: what is this ? */ + client->bound_xl->itable = inode_table_new(conf->inode_lru_limit, +- client->bound_xl); ++ client->bound_xl, 0, 0); + } + } + UNLOCK(&conf->itable_lock); +-- +1.8.3.1 + diff --git a/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch b/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch new file mode 100644 index 0000000..e8a4906 --- /dev/null +++ b/SOURCES/0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch @@ -0,0 +1,784 @@ +From 5294c82e0528059b10cbaab7805b20e76ffdd66b Mon Sep 17 00:00:00 2001 +From: mohit84 <moagrawa@redhat.com> +Date: Mon, 30 Nov 2020 17:39:53 +0530 +Subject: [PATCH 510/511] glusterd[brick_mux]: Optimize friend handshake code + to avoid call_bail (#1614) + +During glusterd handshake glusterd received a volume dictionary +from peer end to compare the own volume dictionary data.If the options +are differ it sets the key to recognize volume options are changed +and call import syntask to delete/start the volume.In brick_mux +environment while number of volumes are high(5k) the dict api in function +glusterd_compare_friend_volume takes time because the function +glusterd_handle_friend_req saves all peer volume data in a single dictionary. +Due to time taken by the function glusterd_handle_friend RPC requests receives +a call_bail from a peer end gluster(CLI) won't be able to show volume status. + +Solution: To optimize the code done below changes +1) Populate a new specific dictionary to save the peer end version specific + data so that function won't take much time to take the decision about the + peer end has some volume updates. +2) In case of volume has differ version set the key in status_arr instead + of saving in a dictionary to make the operation is faster. + +Note: To validate the changes followed below procedure +1) Setup 5100 distributed volumes 3x1 +2) Enable brick_mux +3) Start all the volumes +4) Kill all gluster processes on 3rd node +5) Run a loop to update volume option on a 1st node + for i in {1..5100}; do gluster v set vol$i performance.open-behind off; done +6) Start the glusterd process on the 3rd node +7) Wait to finish handshake and check there should not be any call_bail message + in the logs + +> Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1 +> Fixes: #1613 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit 12545d91eed27ff9abb0505a12c7d4e75b45a53e) +> (Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1613) + +Change-Id: Ibad7c23988539cc369ecc39dea2ea6985470bee1 +BUG: 1898784 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221193 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/ctx.c | 4 + + libglusterfs/src/dict.c | 166 ++++++++++++++++++++++++++- + libglusterfs/src/globals.c | 2 - + libglusterfs/src/glusterfs/dict.h | 5 + + libglusterfs/src/glusterfs/globals.h | 2 + + libglusterfs/src/libglusterfs.sym | 1 + + xlators/mgmt/glusterd/src/glusterd-handler.c | 39 ++++--- + xlators/mgmt/glusterd/src/glusterd-sm.c | 6 +- + xlators/mgmt/glusterd/src/glusterd-sm.h | 1 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 148 ++++++++++++++---------- + xlators/mgmt/glusterd/src/glusterd-utils.h | 2 +- + xlators/mgmt/glusterd/src/glusterd.h | 8 +- + 12 files changed, 301 insertions(+), 83 deletions(-) + +diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c +index 4a001c2..ae1a77a 100644 +--- a/libglusterfs/src/ctx.c ++++ b/libglusterfs/src/ctx.c +@@ -14,6 +14,7 @@ + #include "glusterfs/glusterfs.h" + #include "timer-wheel.h" + ++glusterfs_ctx_t *global_ctx = NULL; + glusterfs_ctx_t * + glusterfs_ctx_new() + { +@@ -51,6 +52,9 @@ glusterfs_ctx_new() + GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, 0); + GF_ATOMIC_INIT(ctx->stats.total_pairs_used, 0); + GF_ATOMIC_INIT(ctx->stats.total_dicts_used, 0); ++ ++ if (!global_ctx) ++ global_ctx = ctx; + out: + return ctx; + } +diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c +index d8cdda4..e5f619c 100644 +--- a/libglusterfs/src/dict.c ++++ b/libglusterfs/src/dict.c +@@ -56,7 +56,13 @@ struct dict_cmp { + static data_t * + get_new_data() + { +- data_t *data = mem_get(THIS->ctx->dict_data_pool); ++ data_t *data = NULL; ++ ++ if (global_ctx) { ++ data = mem_get(global_ctx->dict_data_pool); ++ } else { ++ data = mem_get(THIS->ctx->dict_data_pool); ++ } + + if (!data) + return NULL; +@@ -3503,3 +3509,161 @@ unlock: + UNLOCK(&dict->lock); + return 0; + } ++ ++/* Popluate specific dictionary on the basis of passed key array at the ++ time of unserialize buffer ++*/ ++int32_t ++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill, ++ char **suffix_key_arr, dict_t **specific_dict, ++ int totkeycount) ++{ ++ char *buf = orig_buf; ++ int ret = -1; ++ int32_t count = 0; ++ int i = 0; ++ int j = 0; ++ ++ data_t *value = NULL; ++ char *key = NULL; ++ int32_t keylen = 0; ++ int32_t vallen = 0; ++ int32_t hostord = 0; ++ xlator_t *this = NULL; ++ int32_t keylenarr[totkeycount]; ++ ++ this = THIS; ++ GF_ASSERT(this); ++ ++ if (!buf) { ++ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, ++ "buf is null!"); ++ goto out; ++ } ++ ++ if (size == 0) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, ++ "size is 0!"); ++ goto out; ++ } ++ ++ if (!fill) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, ++ "fill is null!"); ++ goto out; ++ } ++ ++ if (!*fill) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, ++ "*fill is null!"); ++ goto out; ++ } ++ ++ if ((buf + DICT_HDR_LEN) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized buffer " ++ "passed. available (%lu) < required (%lu)", ++ (long)(orig_buf + size), (long)(buf + DICT_HDR_LEN)); ++ goto out; ++ } ++ ++ memcpy(&hostord, buf, sizeof(hostord)); ++ count = ntoh32(hostord); ++ buf += DICT_HDR_LEN; ++ ++ if (count < 0) { ++ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO, ++ "count=%d", count, NULL); ++ goto out; ++ } ++ ++ /* Compute specific key length and save in array */ ++ for (i = 0; i < totkeycount; i++) { ++ keylenarr[i] = strlen(suffix_key_arr[i]); ++ } ++ ++ for (i = 0; i < count; i++) { ++ if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized " ++ "buffer passed. available (%lu) < " ++ "required (%lu)", ++ (long)(orig_buf + size), ++ (long)(buf + DICT_DATA_HDR_KEY_LEN)); ++ goto out; ++ } ++ memcpy(&hostord, buf, sizeof(hostord)); ++ keylen = ntoh32(hostord); ++ buf += DICT_DATA_HDR_KEY_LEN; ++ ++ if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized " ++ "buffer passed. available (%lu) < " ++ "required (%lu)", ++ (long)(orig_buf + size), ++ (long)(buf + DICT_DATA_HDR_VAL_LEN)); ++ goto out; ++ } ++ memcpy(&hostord, buf, sizeof(hostord)); ++ vallen = ntoh32(hostord); ++ buf += DICT_DATA_HDR_VAL_LEN; ++ ++ if ((keylen < 0) || (vallen < 0)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized length passed " ++ "key:%d val:%d", ++ keylen, vallen); ++ goto out; ++ } ++ if ((buf + keylen) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized buffer passed. " ++ "available (%lu) < required (%lu)", ++ (long)(orig_buf + size), (long)(buf + keylen)); ++ goto out; ++ } ++ key = buf; ++ buf += keylen + 1; /* for '\0' */ ++ ++ if ((buf + vallen) > (orig_buf + size)) { ++ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF, ++ "undersized buffer passed. " ++ "available (%lu) < required (%lu)", ++ (long)(orig_buf + size), (long)(buf + vallen)); ++ goto out; ++ } ++ value = get_new_data(); ++ ++ if (!value) { ++ ret = -1; ++ goto out; ++ } ++ value->len = vallen; ++ value->data = gf_memdup(buf, vallen); ++ value->data_type = GF_DATA_TYPE_STR_OLD; ++ value->is_static = _gf_false; ++ buf += vallen; ++ ++ ret = dict_addn(*fill, key, keylen, value); ++ if (ret < 0) { ++ data_destroy(value); ++ goto out; ++ } ++ for (j = 0; j < totkeycount; j++) { ++ if (keylen > keylenarr[j]) { ++ if (!strcmp(key + keylen - keylenarr[j], suffix_key_arr[j])) { ++ ret = dict_addn(*specific_dict, key, keylen, value); ++ break; ++ } ++ } ++ } ++ ++ if (ret < 0) ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} +diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c +index e433ee8..30c15b6 100644 +--- a/libglusterfs/src/globals.c ++++ b/libglusterfs/src/globals.c +@@ -96,7 +96,6 @@ const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { + /* This global ctx is a bad hack to prevent some of the libgfapi crashes. + * This should be removed once the patch on resource pool is accepted + */ +-glusterfs_ctx_t *global_ctx = NULL; + pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER; + xlator_t global_xlator; + static int gf_global_mem_acct_enable = 1; +@@ -236,7 +235,6 @@ __glusterfs_this_location() + if (*this_location == NULL) { + thread_xlator = &global_xlator; + } +- + return this_location; + } + +diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h +index 8239c7a..6e469c7 100644 +--- a/libglusterfs/src/glusterfs/dict.h ++++ b/libglusterfs/src/glusterfs/dict.h +@@ -423,4 +423,9 @@ dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result); + + int + dict_serialized_length_lk(dict_t *this); ++ ++int32_t ++dict_unserialize_specific_keys(char *orig_buf, int32_t size, dict_t **fill, ++ char **specific_key_arr, dict_t **specific_dict, ++ int totkeycount); + #endif +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index cc145cd..33fb023 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -199,4 +199,6 @@ int + gf_global_mem_acct_enable_get(void); + int + gf_global_mem_acct_enable_set(int val); ++ ++extern glusterfs_ctx_t *global_ctx; + #endif /* !_GLOBALS_H */ +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index d060292..bc770e2 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -436,6 +436,7 @@ dict_clear_flag + dict_check_flag + dict_unref + dict_unserialize ++dict_unserialize_specific_keys + drop_token + eh_destroy + eh_dump +diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c +index b8799ab..908361c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-handler.c ++++ b/xlators/mgmt/glusterd/src/glusterd-handler.c +@@ -86,6 +86,9 @@ glusterd_big_locked_handler(rpcsvc_request_t *req, rpcsvc_actor actor_fn) + return ret; + } + ++static char *specific_key_suffix[] = {".quota-cksum", ".ckusm", ".version", ++ ".quota-version", ".name"}; ++ + static int + glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + int port, gd1_mgmt_friend_req *friend_req) +@@ -97,6 +100,8 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + char rhost[UNIX_PATH_MAX + 1] = {0}; + uuid_t friend_uuid = {0}; + dict_t *dict = NULL; ++ dict_t *peer_ver = NULL; ++ int totcount = sizeof(specific_key_suffix) / sizeof(specific_key_suffix[0]); + + gf_uuid_parse(uuid_utoa(uuid), friend_uuid); + if (!port) +@@ -104,8 +109,19 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + + ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost)); + ++ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t); ++ dict = dict_new(); ++ peer_ver = dict_new(); ++ + RCU_READ_LOCK; + ++ if (!ctx || !dict || !peer_ver) { ++ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Unable to allocate memory"); ++ ret = -1; ++ goto out; ++ } ++ + peerinfo = glusterd_peerinfo_find(uuid, rhost); + + if (peerinfo == NULL) { +@@ -130,28 +146,14 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + event->peername = gf_strdup(peerinfo->hostname); + gf_uuid_copy(event->peerid, peerinfo->uuid); + +- ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t); +- +- if (!ctx) { +- gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, +- "Unable to allocate memory"); +- ret = -1; +- goto out; +- } +- + gf_uuid_copy(ctx->uuid, uuid); + if (hostname) + ctx->hostname = gf_strdup(hostname); + ctx->req = req; + +- dict = dict_new(); +- if (!dict) { +- ret = -1; +- goto out; +- } +- +- ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len, +- &dict); ++ ret = dict_unserialize_specific_keys( ++ friend_req->vols.vols_val, friend_req->vols.vols_len, &dict, ++ specific_key_suffix, &peer_ver, totcount); + + if (ret) + goto out; +@@ -159,6 +161,7 @@ glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname, + dict->extra_stdfree = friend_req->vols.vols_val; + + ctx->vols = dict; ++ ctx->peer_ver = peer_ver; + event->ctx = ctx; + + ret = glusterd_friend_sm_inject_event(event); +@@ -188,6 +191,8 @@ out: + } else { + free(friend_req->vols.vols_val); + } ++ if (peer_ver) ++ dict_unref(peer_ver); + if (event) + GF_FREE(event->peername); + GF_FREE(event); +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c +index 044da3d..d10a792 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.c +@@ -106,6 +106,8 @@ glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx) + + if (ctx->vols) + dict_unref(ctx->vols); ++ if (ctx->peer_ver) ++ dict_unref(ctx->peer_ver); + GF_FREE(ctx->hostname); + GF_FREE(ctx); + } +@@ -936,8 +938,8 @@ glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx) + // Build comparison logic here. + pthread_mutex_lock(&conf->import_volumes); + { +- ret = glusterd_compare_friend_data(ev_ctx->vols, &status, +- event->peername); ++ ret = glusterd_compare_friend_data(ev_ctx->vols, ev_ctx->peer_ver, ++ &status, event->peername); + if (ret) { + pthread_mutex_unlock(&conf->import_volumes); + goto out; +diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h +index ce008ac..efdf68e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-sm.h ++++ b/xlators/mgmt/glusterd/src/glusterd-sm.h +@@ -174,6 +174,7 @@ typedef struct glusterd_friend_req_ctx_ { + rpcsvc_request_t *req; + int port; + dict_t *vols; ++ dict_t *peer_ver; // Dictionary to save peer ver data + } glusterd_friend_req_ctx_t; + + typedef struct glusterd_friend_update_ctx_ { +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index f7030fb..cf32bd9 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -3709,12 +3709,14 @@ out: + return ret; + } + +-int32_t +-glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, +- int32_t *status, char *hostname) ++static int32_t ++glusterd_compare_friend_volume(dict_t *peer_data, ++ glusterd_friend_synctask_args_t *arg, ++ int32_t count, int32_t *status, char *hostname) + { + int32_t ret = -1; + char key[64] = ""; ++ char key_prefix[32]; + int keylen; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; +@@ -3726,15 +3728,20 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + xlator_t *this = NULL; + + GF_ASSERT(peer_data); ++ GF_ASSERT(arg); + GF_ASSERT(status); + + this = THIS; + GF_ASSERT(this); + +- keylen = snprintf(key, sizeof(key), "volume%d.name", count); +- ret = dict_get_strn(peer_data, key, keylen, &volname); +- if (ret) ++ snprintf(key_prefix, sizeof(key_prefix), "volume%d", count); ++ keylen = snprintf(key, sizeof(key), "%s.name", key_prefix); ++ ret = dict_get_strn(arg->peer_ver_data, key, keylen, &volname); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s is NULL in peer_ver_data", key, NULL); + goto out; ++ } + + ret = glusterd_volinfo_find(volname, &volinfo); + if (ret) { +@@ -3750,10 +3757,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + goto out; + } + +- keylen = snprintf(key, sizeof(key), "volume%d.version", count); +- ret = dict_get_int32n(peer_data, key, keylen, &version); +- if (ret) ++ keylen = snprintf(key, sizeof(key), "%s.version", key_prefix); ++ ret = dict_get_int32n(arg->peer_ver_data, key, keylen, &version); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s is NULL in peer_ver_data", key, NULL); + goto out; ++ } + + if (version > volinfo->version) { + // Mismatch detected +@@ -3772,10 +3782,13 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + + // Now, versions are same, compare cksums. + // +- snprintf(key, sizeof(key), "volume%d.ckusm", count); +- ret = dict_get_uint32(peer_data, key, &cksum); +- if (ret) ++ snprintf(key, sizeof(key), "%s.ckusm", key_prefix); ++ ret = dict_get_uint32(arg->peer_ver_data, key, &cksum); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s is NULL in peer_ver_data", key, NULL); + goto out; ++ } + + if (cksum != volinfo->cksum) { + ret = 0; +@@ -3790,8 +3803,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + goto skip_quota; + +- snprintf(key, sizeof(key), "volume%d.quota-version", count); +- ret = dict_get_uint32(peer_data, key, "a_version); ++ snprintf(key, sizeof(key), "%s.quota-version", key_prefix); ++ ret = dict_get_uint32(arg->peer_ver_data, key, "a_version); + if (ret) { + gf_msg_debug(this->name, 0, + "quota-version key absent for" +@@ -3809,6 +3822,7 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + "%d on peer %s", + volinfo->volname, volinfo->quota_conf_version, quota_version, + hostname); ++ GF_ATOMIC_INIT(volinfo->volpeerupdate, 1); + *status = GLUSTERD_VOL_COMP_UPDATE_REQ; + goto out; + } else if (quota_version < volinfo->quota_conf_version) { +@@ -3819,8 +3833,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, + + // Now, versions are same, compare cksums. + // +- snprintf(key, sizeof(key), "volume%d.quota-cksum", count); +- ret = dict_get_uint32(peer_data, key, "a_cksum); ++ snprintf(key, sizeof(key), "%s.quota-cksum", key_prefix); ++ ret = dict_get_uint32(arg->peer_ver_data, key, "a_cksum); + if (ret) { + gf_msg_debug(this->name, 0, + "quota checksum absent for " +@@ -3846,13 +3860,12 @@ skip_quota: + *status = GLUSTERD_VOL_COMP_SCS; + + out: +- keylen = snprintf(key, sizeof(key), "volume%d.update", count); +- + if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) { +- ret = dict_set_int32n(peer_data, key, keylen, 1); +- } else { +- ret = dict_set_int32n(peer_data, key, keylen, 0); ++ /*Set the status to ensure volume is updated on the peer ++ */ ++ arg->status_arr[(count / 64)] ^= 1UL << (count % 64); + } ++ + if (*status == GLUSTERD_VOL_COMP_RJT) { + gf_event(EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s", + volinfo->volname); +@@ -4935,8 +4948,9 @@ out: + return ret; + } + +-int32_t +-glusterd_import_friend_volume(dict_t *peer_data, int count) ++static int32_t ++glusterd_import_friend_volume(dict_t *peer_data, int count, ++ glusterd_friend_synctask_args_t *arg) + { + int32_t ret = -1; + glusterd_conf_t *priv = NULL; +@@ -4954,10 +4968,27 @@ glusterd_import_friend_volume(dict_t *peer_data, int count) + priv = this->private; + GF_ASSERT(priv); + +- ret = snprintf(key, sizeof(key), "volume%d.update", count); +- ret = dict_get_int32n(peer_data, key, ret, &update); +- if (ret || !update) { ++ if (arg) { ++ /*Check if the volume options are updated on the other peers ++ */ ++ update = (1UL & (arg->status_arr[(count / 64)] >> (count % 64))); ++ } else { ++ ret = snprintf(key, sizeof(key), "volume%d.update", count); ++ ret = dict_get_int32n(peer_data, key, ret, &update); ++ if (ret) { ++ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, ++ "Key=%s", key, NULL); ++ goto out; ++ } ++ } ++ ++ if (!update) { + /* if update is 0 that means the volume is not imported */ ++ gf_log(this->name, GF_LOG_DEBUG, ++ "The volume%d does" ++ " not have any peer change", ++ count); ++ ret = 0; + goto out; + } + +@@ -5045,6 +5076,8 @@ glusterd_import_friend_volumes_synctask(void *opaque) + glusterd_conf_t *conf = NULL; + dict_t *peer_data = NULL; + glusterd_friend_synctask_args_t *arg = NULL; ++ uint64_t bm = 0; ++ uint64_t mask = 0; + + this = THIS; + GF_ASSERT(this); +@@ -5056,17 +5089,7 @@ glusterd_import_friend_volumes_synctask(void *opaque) + if (!arg) + goto out; + +- peer_data = dict_new(); +- if (!peer_data) { +- goto out; +- } +- +- ret = dict_unserialize(arg->dict_buf, arg->dictlen, &peer_data); +- if (ret) { +- errno = ENOMEM; +- goto out; +- } +- ++ peer_data = arg->peer_data; + ret = dict_get_int32n(peer_data, "count", SLEN("count"), &count); + if (ret) + goto out; +@@ -5083,11 +5106,18 @@ glusterd_import_friend_volumes_synctask(void *opaque) + conf->restart_bricks = _gf_true; + + while (i <= count) { +- ret = glusterd_import_friend_volume(peer_data, i); +- if (ret) { +- break; ++ bm = arg->status_arr[i / 64]; ++ while (bm != 0) { ++ /* mask will contain the lowest bit set from bm. */ ++ mask = bm & (-bm); ++ bm ^= mask; ++ ret = glusterd_import_friend_volume(peer_data, i + ffsll(mask) - 2, ++ arg); ++ if (ret < 0) { ++ break; ++ } + } +- i++; ++ i += 64; + } + if (i > count) { + glusterd_svcs_manager(NULL); +@@ -5095,11 +5125,9 @@ glusterd_import_friend_volumes_synctask(void *opaque) + conf->restart_bricks = _gf_false; + synccond_broadcast(&conf->cond_restart_bricks); + out: +- if (peer_data) +- dict_unref(peer_data); + if (arg) { +- if (arg->dict_buf) +- GF_FREE(arg->dict_buf); ++ dict_unref(arg->peer_data); ++ dict_unref(arg->peer_ver_data); + GF_FREE(arg); + } + +@@ -5121,7 +5149,7 @@ glusterd_import_friend_volumes(dict_t *peer_data) + goto out; + + while (i <= count) { +- ret = glusterd_import_friend_volume(peer_data, i); ++ ret = glusterd_import_friend_volume(peer_data, i, NULL); + if (ret) + goto out; + i++; +@@ -5260,7 +5288,8 @@ out: + } + + int32_t +-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) ++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status, ++ char *hostname) + { + int32_t ret = -1; + int32_t count = 0; +@@ -5289,8 +5318,19 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + if (ret) + goto out; + ++ arg = GF_CALLOC(1, sizeof(*arg) + sizeof(uint64_t) * (count / 64), ++ gf_common_mt_char); ++ if (!arg) { ++ ret = -1; ++ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, ++ "Out Of Memory"); ++ goto out; ++ } ++ arg->peer_data = dict_ref(peer_data); ++ arg->peer_ver_data = dict_ref(cmp); + while (i <= count) { +- ret = glusterd_compare_friend_volume(peer_data, i, status, hostname); ++ ret = glusterd_compare_friend_volume(peer_data, arg, i, status, ++ hostname); + if (ret) + goto out; + +@@ -5310,21 +5350,13 @@ glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, char *hostname) + * first brick to come up before attaching the subsequent bricks + * in case brick multiplexing is enabled + */ +- arg = GF_CALLOC(1, sizeof(*arg), gf_common_mt_char); +- ret = dict_allocate_and_serialize(peer_data, &arg->dict_buf, +- &arg->dictlen); +- if (ret < 0) { +- gf_log(this->name, GF_LOG_ERROR, +- "dict_serialize failed while handling " +- " import friend volume request"); +- goto out; +- } +- + glusterd_launch_synctask(glusterd_import_friend_volumes_synctask, arg); + } + + out: + if (ret && arg) { ++ dict_unref(arg->peer_data); ++ dict_unref(arg->peer_ver_data); + GF_FREE(arg); + } + gf_msg_debug(this->name, 0, "Returning with ret: %d, status: %d", ret, +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 5f5de82..02d85d2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -231,7 +231,7 @@ glusterd_add_volumes_to_export_dict(dict_t *peer_data, char **buf, + u_int *length); + + int32_t +-glusterd_compare_friend_data(dict_t *peer_data, int32_t *status, ++glusterd_compare_friend_data(dict_t *peer_data, dict_t *cmp, int32_t *status, + char *hostname); + + int +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index f739b5d..efe4d0e 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -234,8 +234,12 @@ typedef struct glusterd_add_dict_args { + } glusterd_add_dict_args_t; + + typedef struct glusterd_friend_synctask_args { +- char *dict_buf; +- u_int dictlen; ++ dict_t *peer_data; ++ dict_t *peer_ver_data; // Dictionary to save peer version data ++ /* This status_arr[1] is not a real size, real size of the array ++ is dynamically allocated ++ */ ++ uint64_t status_arr[1]; + } glusterd_friend_synctask_args_t; + + typedef enum gf_brick_status { +-- +1.8.3.1 + diff --git a/SOURCES/0511-features-shard-Missing-format-specifier.patch b/SOURCES/0511-features-shard-Missing-format-specifier.patch new file mode 100644 index 0000000..baf6cf4 --- /dev/null +++ b/SOURCES/0511-features-shard-Missing-format-specifier.patch @@ -0,0 +1,39 @@ +From 868d346cc35c222d19b95bd9c367674c9ea859df Mon Sep 17 00:00:00 2001 +From: Vinayakswami Hariharmath <vharihar@redhat.com> +Date: Tue, 15 Dec 2020 16:23:49 +0530 +Subject: [PATCH 511/511] features/shard: Missing format specifier + +PRIu64 format specifier explicitly needs (percent sign) as +prefix and that was missing as part of the below commit on +downstream + +https://code.engineering.redhat.com/gerrit/#/c/221061/ + +BUG: 1752739 +Change-Id: I354de58796f350eb1aa42fcdf8092ca2e69ccbb6 + +Signed-off-by: Vinayakswami Hariharmath <vharihar@redhat.com> +Change-Id: I4598893e3fcca3a2b3e6e8ef9b64b3e5e98923e6 +Reviewed-on: https://code.engineering.redhat.com/gerrit/221217 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + xlators/features/shard/src/shard.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c +index a967f35..099b062 100644 +--- a/xlators/features/shard/src/shard.c ++++ b/xlators/features/shard/src/shard.c +@@ -1855,7 +1855,7 @@ int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, + */ + if (!inode) { + gf_msg_debug(this->name, 0, +- "Last shard to be truncated absent in backend: " PRIu64 ++ "Last shard to be truncated absent in backend:%" PRIu64 + " of gfid: %s. Directly proceeding to update file size", + local->first_block, uuid_utoa(local->loc.inode->gfid)); + shard_update_file_size(frame, this, NULL, &local->loc, +-- +1.8.3.1 + diff --git a/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch b/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch new file mode 100644 index 0000000..37de503 --- /dev/null +++ b/SOURCES/0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch @@ -0,0 +1,105 @@ +From c963653a89c3f6466af9a3e8f19246a7907f7f8c Mon Sep 17 00:00:00 2001 +From: nik-redhat <nladha@redhat.com> +Date: Thu, 30 Jul 2020 13:04:52 +0530 +Subject: [PATCH 512/517] glusterd: shared storage mount fails in ipv6 + environment + +Issue: +In case of ipv6 environment, the mounting of glusterd_shared_storage +volume fails as it doesn't recognises the ipv6 enviornment. + +Fix: +In case of ipv6 environment, the address-family is passed +to the hooks script on creating shared-storage, then depending +upon the address-family --xlator-option=transport.address-family=inet6 +option is added to the mount command, and the mounting succeeds. + +>Fixes: #1406 +> +>Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d +>Signed-off-by: nik-redhat <nladha@redhat.com> + +Upstream patch: https://review.gluster.org/c/glusterfs/+/24797 +BUG: 1856574 + +Change-Id: Ib1888c34d85e6c01618b0ba214cbe1f57576908d +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/221844 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Srijan Sivakumar <ssivakum@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + .../set/post/S32gluster_enable_shared_storage.sh | 11 +++++++++-- + xlators/mgmt/glusterd/src/glusterd-hooks.c | 19 +++++++++++++++++++ + 2 files changed, 28 insertions(+), 2 deletions(-) + +diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +index 3bae37c..9597503 100755 +--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh ++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +@@ -104,8 +104,15 @@ function check_volume_status() + echo $status + } + +-mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ +- /run/gluster/shared_storage" ++key=`echo $5 | cut -d '=' -f 1` ++val=`echo $5 | cut -d '=' -f 2` ++if [ "$key" == "transport.address-family" ]; then ++ mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \ ++ $local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage" ++else ++ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ ++ /var/run/gluster/shared_storage" ++fi + + if [ "$option" == "enable" ]; then + retry=0; +diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c +index 216cdf7..4f0d775 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c ++++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c +@@ -200,11 +200,16 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) + int i = 0; + int count = 0; + int ret = -1; ++ int flag = 0; + char query[1024] = { + 0, + }; + char *key = NULL; + char *value = NULL; ++ char *inet_family = NULL; ++ xlator_t *this = NULL; ++ this = THIS; ++ GF_ASSERT(this); + + ret = dict_get_int32(dict, "count", &count); + if (ret) +@@ -228,9 +233,23 @@ glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner) + continue; + + runner_argprintf(runner, "%s=%s", key, value); ++ if ((strncmp(key, "cluster.enable-shared-storage", ++ SLEN("cluster.enable-shared-storage")) == 0 || ++ strncmp(key, "enable-shared-storage", ++ SLEN("enable-shared-storage")) == 0) && ++ strncmp(value, "enable", SLEN("enable")) == 0) ++ flag = 1; + } + + glusterd_hooks_add_custom_args(dict, runner); ++ if (flag == 1) { ++ ret = dict_get_str_sizen(this->options, "transport.address-family", ++ &inet_family); ++ if (!ret) { ++ runner_argprintf(runner, "transport.address-family=%s", ++ inet_family); ++ } ++ } + + ret = 0; + out: +-- +1.8.3.1 + diff --git a/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch b/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch new file mode 100644 index 0000000..ebd5609 --- /dev/null +++ b/SOURCES/0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch @@ -0,0 +1,191 @@ +From 708c17a8a69b2657f384affaedfcf4ba0a123893 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Wed, 23 Dec 2020 14:45:07 +0530 +Subject: [PATCH 513/517] afr: mark pending xattrs as a part of metadata heal + +...if pending xattrs are zero for all children. + +Problem: +If there are no pending xattrs and a metadata heal needs to be +performed, it can be possible that we end up with xattrs inadvertendly +deleted from all bricks, as explained in the BZ. + +Fix: +After picking one among the sources as the good copy, mark pending xattrs on +all sources to blame the sinks. Now even if this metadata heal fails midway, +a subsequent heal will still choose one of the valid sources that it +picked previously. + +Upstream patch details: +> Fixes: #1067 +> Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/21922/ + +BUG: 1640148 +Change-Id: If1b050b70b0ad911e162c04db4d89b263e2b8d7b +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/222073 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + tests/bugs/replicate/mdata-heal-no-xattrs.t | 59 ++++++++++++++++++++++ + xlators/cluster/afr/src/afr-self-heal-metadata.c | 62 +++++++++++++++++++++++- + 2 files changed, 120 insertions(+), 1 deletion(-) + create mode 100644 tests/bugs/replicate/mdata-heal-no-xattrs.t + +diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t +new file mode 100644 +index 0000000..d3b0c50 +--- /dev/null ++++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t +@@ -0,0 +1,59 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; ++TEST $CLI volume set $V0 cluster.self-heal-daemon off ++TEST $CLI volume start $V0 ++ ++TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 ++echo "Data">$M0/FILE ++ret=$? ++TEST [ $ret -eq 0 ] ++ ++# Change permission on brick-0: simulates the case where there is metadata ++# mismatch but no pending xattrs. This brick will become the source for heal. ++TEST chmod +x $B0/$V0"0"/FILE ++ ++# Add gfid to xattrop ++xattrop_b0=$(afr_get_index_path $B0/$V0"0") ++base_entry_b0=`ls $xattrop_b0` ++gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE)) ++TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE ++EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0 ++ ++TEST $CLI volume set $V0 cluster.self-heal-daemon on ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++ ++# Brick-0 should contain xattrs blaming other 2 bricks. ++# The values will be zero because heal is over. ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/FILE ++EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}0/FILE ++ ++# Brick-1 and Brick-2 must not contain any afr xattrs. ++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}1/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}1/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}1/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}2/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}2/FILE ++TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE ++ ++# check permission bits. ++EXPECT '755' stat -c %a $B0/${V0}0/FILE ++EXPECT '755' stat -c %a $B0/${V0}1/FILE ++EXPECT '755' stat -c %a $B0/${V0}2/FILE ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++cleanup; +diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c +index f4e31b6..03f43ba 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c ++++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c +@@ -190,6 +190,59 @@ out: + return ret; + } + ++static int ++__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this, ++ inode_t *inode, ++ struct afr_reply *replies, ++ unsigned char *sources) ++{ ++ int ret = 0; ++ int i = 0; ++ int m_idx = 0; ++ afr_private_t *priv = NULL; ++ int raw[AFR_NUM_CHANGE_LOGS] = {0}; ++ dict_t *xattr = NULL; ++ ++ priv = this->private; ++ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION); ++ raw[m_idx] = 1; ++ ++ xattr = dict_new(); ++ if (!xattr) ++ return -ENOMEM; ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if (sources[i]) ++ continue; ++ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw, ++ sizeof(int) * AFR_NUM_CHANGE_LOGS); ++ if (ret) { ++ ret = -1; ++ goto out; ++ } ++ } ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if (!sources[i]) ++ continue; ++ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO, ++ "Failed to set pending metadata xattr on child %d for %s", i, ++ uuid_utoa(inode->gfid)); ++ goto out; ++ } ++ } ++ ++ afr_replies_wipe(replies, priv->child_count); ++ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies); ++ ++out: ++ if (xattr) ++ dict_unref(xattr); ++ return ret; ++} ++ + /* + * Look for mismatching uid/gid or mode or user xattrs even if + * AFR xattrs don't say so, and pick one arbitrarily as winner. */ +@@ -210,6 +263,7 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, + }; + int source = -1; + int sources_count = 0; ++ int ret = 0; + + priv = this->private; + +@@ -300,7 +354,13 @@ __afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this, + healed_sinks[i] = 1; + } + } +- ++ if ((sources_count == priv->child_count) && (source > -1) && ++ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) { ++ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode, ++ replies, sources); ++ if (ret < 0) ++ return ret; ++ } + out: + afr_mark_active_sinks(this, sources, locked_on, healed_sinks); + return source; +-- +1.8.3.1 + diff --git a/SOURCES/0514-afr-event-gen-changes.patch b/SOURCES/0514-afr-event-gen-changes.patch new file mode 100644 index 0000000..9f9562e --- /dev/null +++ b/SOURCES/0514-afr-event-gen-changes.patch @@ -0,0 +1,308 @@ +From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Wed, 23 Dec 2020 14:57:51 +0530 +Subject: [PATCH 514/517] afr: event gen changes + +The general idea of the changes is to prevent resetting event generation +to zero in the inode ctx, since event gen is something that should +follow 'causal order'. + +Change #1: +For a read txn, in inode refresh cbk, if event_generation is +found zero, we are failing the read fop. This is not needed +because change in event gen is only a marker for the next inode refresh to +happen and should not be taken into account by the current read txn. + +Change #2: +The event gen being zero above can happen if there is a racing lookup, +which resets even get (in afr_lookup_done) if there are non zero afr +xattrs. The resetting is done only to trigger an inode refresh and a +possible client side heal on the next lookup. That can be acheived by +setting the need_refresh flag in the inode ctx. So replaced all +occurences of resetting even gen to zero with a call to +afr_inode_need_refresh_set(). + +Change #3: +In both lookup and discover path, we are doing an inode refresh which is +not required since all 3 essentially do the same thing- update the inode +ctx with the good/bad copies from the brick replies. Inode refresh also +triggers background heals, but I think it is okay to do it when we call +refresh during the read and write txns and not in the lookup path. + +The .ts which relied on inode refresh in lookup path to trigger heals are +now changed to do read txn so that inode refresh and the heal happens. + +Upstream patch details: +> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec +> Fixes: #1179 +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> +> Reported-by: Erik Jacobson <erik.jacobson at hpe.com> +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/ + +BUG: 1640148 +Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/222074 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + ...fid-mismatch-resolution-with-fav-child-policy.t | 8 +- + xlators/cluster/afr/src/afr-common.c | 92 +++++----------------- + xlators/cluster/afr/src/afr-dir-write.c | 6 +- + xlators/cluster/afr/src/afr.h | 5 +- + 4 files changed, 29 insertions(+), 82 deletions(-) + +diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t +index f4aa351..12af0c8 100644 +--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t ++++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t +@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ] + #We know that second brick has the bigger size file + BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1) + +-TEST ls $M0/f3 +-TEST cat $M0/f3 ++TEST ls $M0 #Trigger entry heal via readdir inode refresh ++TEST cat $M0/f3 #Trigger data heal via readv inode refresh + EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + #gfid split-brain should be resolved +@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 + EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +-TEST ls $M0/f4 +-TEST cat $M0/f4 ++TEST ls $M0 #Trigger entry heal via readdir inode refresh ++TEST cat $M0/f4 #Trigger data heal via readv inode refresh + EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + #gfid split-brain should be resolved +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index fca2cd5..90b4f14 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, + metadatamap |= (1 << index); + } + if (metadatamap_old != metadatamap) { +- event = 0; ++ __afr_inode_need_refresh_set(inode, this); + } + break; + +@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local, + datamap |= (1 << index); + } + if (datamap_old != datamap) +- event = 0; ++ __afr_inode_need_refresh_set(inode, this); + break; + + default: +@@ -461,34 +461,6 @@ out: + } + + int +-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this) +-{ +- int ret = -1; +- uint16_t datamap = 0; +- uint16_t metadatamap = 0; +- uint32_t event = 0; +- uint64_t val = 0; +- afr_inode_ctx_t *ctx = NULL; +- +- ret = __afr_inode_ctx_get(this, inode, &ctx); +- if (ret) +- return ret; +- +- val = ctx->read_subvol; +- +- metadatamap = (val & 0x000000000000ffff) >> 0; +- datamap = (val & 0x00000000ffff0000) >> 16; +- event = 0; +- +- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) | +- (((uint64_t)event) << 32); +- +- ctx->read_subvol = val; +- +- return ret; +-} +- +-int + __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int *event_p) + { +@@ -559,22 +531,6 @@ out: + } + + int +-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) +-{ +- afr_private_t *priv = NULL; +- int ret = -1; +- +- priv = this->private; +- +- if (priv->child_count <= 16) +- ret = __afr_inode_event_gen_reset_small(inode, this); +- else +- ret = -1; +- +- return ret; +-} +- +-int + afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data, + unsigned char *metadata, int *event_p) + { +@@ -723,30 +679,22 @@ out: + return need_refresh; + } + +-static int +-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) ++int ++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) + { + int ret = -1; + afr_inode_ctx_t *ctx = NULL; + +- GF_VALIDATE_OR_GOTO(this->name, inode, out); +- +- LOCK(&inode->lock); +- { +- ret = __afr_inode_ctx_get(this, inode, &ctx); +- if (ret) +- goto unlock; +- ++ ret = __afr_inode_ctx_get(this, inode, &ctx); ++ if (ret == 0) { + ctx->need_refresh = _gf_true; + } +-unlock: +- UNLOCK(&inode->lock); +-out: ++ + return ret; + } + + int +-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) ++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this) + { + int ret = -1; + +@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this) + + LOCK(&inode->lock); + { +- ret = __afr_inode_event_gen_reset(inode, this); ++ ret = __afr_inode_need_refresh_set(inode, this); + } + UNLOCK(&inode->lock); + out: +@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err) + ret = afr_inode_get_readable(frame, inode, this, local->readable, + &event_generation, local->transaction.type); + +- if (ret == -EIO || (local->is_read_txn && !event_generation)) { ++ if (ret == -EIO) { + /* No readable subvolume even after refresh ==> splitbrain.*/ + if (!priv->fav_child_policy) { + err = EIO; +@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + if (read_subvol == -1) + goto cant_interpret; + if (ret) { +- afr_inode_event_gen_reset(local->inode, this); ++ afr_inode_need_refresh_set(local->inode, this); + dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY); + } + } else { +@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int read_subvol = -1; ++ int ret = 0; + unsigned char *data_readable = NULL; + unsigned char *success_replies = NULL; + +@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this) + if (!afr_has_quorum(success_replies, this, frame)) + goto unwind; + +- afr_replies_interpret(frame, this, local->inode, NULL); ++ ret = afr_replies_interpret(frame, this, local->inode, NULL); ++ if (ret) { ++ afr_inode_need_refresh_set(local->inode, this); ++ } + + read_subvol = afr_read_subvol_decide(local->inode, this, NULL, + data_readable); +@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + afr_read_subvol_get(loc->inode, this, NULL, NULL, &event, + AFR_DATA_TRANSACTION, NULL); + +- if (afr_is_inode_refresh_reqd(loc->inode, this, event, +- local->event_generation)) +- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do); +- else +- afr_discover_do(frame, this, 0); ++ afr_discover_do(frame, this, 0); + + return 0; + out: +@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + afr_read_subvol_get(loc->parent, this, NULL, NULL, &event, + AFR_DATA_TRANSACTION, NULL); + +- if (afr_is_inode_refresh_reqd(loc->inode, this, event, +- local->event_generation)) +- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do); +- else +- afr_lookup_do(frame, this, 0); ++ afr_lookup_do(frame, this, 0); + + return 0; + out: +diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c +index 416c19d..d419bfc 100644 +--- a/xlators/cluster/afr/src/afr-dir-write.c ++++ b/xlators/cluster/afr/src/afr-dir-write.c +@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this) + continue; + if (local->replies[i].op_ret < 0) { + if (local->inode) +- afr_inode_event_gen_reset(local->inode, this); ++ afr_inode_need_refresh_set(local->inode, this); + if (local->parent) +- afr_inode_event_gen_reset(local->parent, this); ++ afr_inode_need_refresh_set(local->parent, this); + if (local->parent2) +- afr_inode_event_gen_reset(local->parent2, this); ++ afr_inode_need_refresh_set(local->parent2, this); + continue; + } + +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index ed5096e..3a2b26d 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, + int event_generation); + + int +-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this); ++__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); ++ ++int ++afr_inode_need_refresh_set(inode_t *inode, xlator_t *this); + + int + afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this, +-- +1.8.3.1 + diff --git a/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch b/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch new file mode 100644 index 0000000..9c7693a --- /dev/null +++ b/SOURCES/0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch @@ -0,0 +1,2155 @@ +From aab8a587360214432c4a2ab59134411f1d38c509 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Wed, 9 Dec 2020 10:46:31 +0530 +Subject: [PATCH 515/517] cluster/afr: Heal directory rename without + rmdir/mkdir + +Problem1: +When a directory is renamed while a brick +is down entry-heal always did an rm -rf on that directory on +the sink on old location and did mkdir and created the directory +hierarchy again in the new location. This is inefficient. + +Problem2: +Renamedir heal order may lead to a scenario where directory in +the new location could be created before deleting it from old +location leading to 2 directories with same gfid in posix. + +Fix: +As part of heal, if oldlocation is healed first and is not present in +source-brick always rename it into a hidden directory inside the +sink-brick so that when heal is triggered in new-location shd can +rename it from this hidden directory to the new-location. + +If new-location heal is triggered first and it detects that the +directory already exists in the brick, then it should skip healing the +directory until it appears in the hidden directory. + +Credits: Ravi for rename-data-loss.t script + +Upstream patch details: +> Fixes: #1211 +> Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24373/ + +BUG: 1640148 +Change-Id: I0cba2006f35cd03d314d18211ce0bd530e254843 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/220660 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + tests/afr.rc | 16 + + tests/basic/afr/afr-anon-inode-no-quorum.t | 63 ++++ + tests/basic/afr/afr-anon-inode.t | 114 ++++++ + tests/basic/afr/entry-self-heal-anon-dir-off.t | 464 ++++++++++++++++++++++++ + tests/basic/afr/rename-data-loss.t | 72 ++++ + tests/bugs/replicate/bug-1744548-heal-timeout.t | 6 +- + tests/features/trash.t | 74 ++-- + xlators/cluster/afr/src/afr-common.c | 46 ++- + xlators/cluster/afr/src/afr-dir-read.c | 12 +- + xlators/cluster/afr/src/afr-self-heal-common.c | 182 ++++++++++ + xlators/cluster/afr/src/afr-self-heal-entry.c | 206 +++++++++-- + xlators/cluster/afr/src/afr-self-heal-name.c | 33 +- + xlators/cluster/afr/src/afr-self-heal.h | 5 + + xlators/cluster/afr/src/afr-self-heald.c | 178 ++++++++- + xlators/cluster/afr/src/afr-self-heald.h | 2 +- + xlators/cluster/afr/src/afr.c | 40 +- + xlators/cluster/afr/src/afr.h | 11 + + xlators/mgmt/glusterd/src/glusterd-volgen.c | 39 ++ + xlators/mgmt/glusterd/src/glusterd-volume-set.c | 6 + + 19 files changed, 1442 insertions(+), 127 deletions(-) + create mode 100644 tests/basic/afr/afr-anon-inode-no-quorum.t + create mode 100644 tests/basic/afr/afr-anon-inode.t + create mode 100644 tests/basic/afr/entry-self-heal-anon-dir-off.t + create mode 100644 tests/basic/afr/rename-data-loss.t + +diff --git a/tests/afr.rc b/tests/afr.rc +index 35f352d..2417899 100644 +--- a/tests/afr.rc ++++ b/tests/afr.rc +@@ -105,3 +105,19 @@ function get_quorum_type() + local repl_id="$3" + cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}' + } ++ ++function afr_private_key_value() ++{ ++ local v=$1 ++ local m=$2 ++ local replica_id=$3 ++ local key=$4 ++#xargs at the end will strip leading spaces ++ grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs ++} ++ ++function afr_anon_entry_count() ++{ ++ local b=$1 ++ ls $b/.glusterfs-anonymous-inode* | wc -l ++} +diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t +new file mode 100644 +index 0000000..896ba0c +--- /dev/null ++++ b/tests/basic/afr/afr-anon-inode-no-quorum.t +@@ -0,0 +1,63 @@ ++#!/bin/bash ++ ++#Test that anon-inode entry is not cleaned up as long as there exists at least ++#one valid entry ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.readdir-ahead off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 cluster.entry-self-heal off ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 ++ ++TEST touch $M0/a $M0/b ++ ++gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a)) ++gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b)) ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST mv $M0/a $M0/a-new ++TEST mv $M0/b $M0/b-new ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++TEST ! ls $M0/a ++TEST ! ls $M0/b ++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b ++#Make sure index heal doesn't happen after enabling heal ++TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 ++TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/* ++TEST $CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++#Allow time for a scan ++sleep 5 ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a ++TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b ++inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b) ++TEST rm -f $M0/a-new ++TEST stat $M0/b-new ++ ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 ++EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new ++ ++cleanup +diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t +new file mode 100644 +index 0000000..f4cf37a +--- /dev/null ++++ b/tests/basic/afr/afr-anon-inode.t +@@ -0,0 +1,114 @@ ++#!/bin/bash ++#Tests that afr-anon-inode test cases work fine as expected ++#These are cases where in entry-heal/name-heal we dont know entry for an inode ++#so these inodes are kept in a special directory ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume start $V0 ++TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; ++EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" ++TEST $CLI volume set $V0 cluster.use-anonymous-inode no ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" ++TEST $CLI volume set $V0 cluster.use-anonymous-inode yes ++EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" ++TEST mkdir -p $M0/d1/b $M0/d2/a ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST mv $M0/d2/a $M0/d1 ++TEST mv $M0/d1/b $M0/d2 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) ++TEST [[ -d $B0/${V0}1/$anon_inode_name ]] ++TEST [[ -d $B0/${V0}2/$anon_inode_name ]] ++anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name) ++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name ++EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name ++ ++TEST ! ls $M0/$anon_inode_name ++EXPECT "^4$" echo $(ls -a $M0 | wc -l) ++ ++#Test purging code path by shd ++TEST $CLI volume heal $V0 disable ++TEST mkdir $M0/l0 $M0/l1 $M0/l2 ++TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file ++TEST ln $M0/del-file $M0/del-file-link ++TEST ln $M0/l0/file $M0/l1/file-link1 ++TEST ln $M0/l0/file $M0/l2/file-link2 ++TEST mkdir -p $M0/del-recursive-dir/d1 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++TEST rm -f $M0/del-file $M0/del-file-nolink ++TEST rm -rf $M0/del-recursive-dir ++TEST mv $M0/d1/a $M0/d2 ++TEST mv $M0/l0/file $M0/l0/renamed-file ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0 ++ ++nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink)) ++link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file)) ++dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir)) ++rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a)) ++rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file)) ++TEST ! stat $M0/del-file ++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid ++TEST ! stat $M0/del-file-nolink ++TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid ++TEST ! stat $M0/del-recursive-dir ++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid ++TEST ! stat $M0/d1/a ++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid ++TEST ! stat $M0/l0/file ++TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid ++ ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 ++TEST ! stat $M0/l1/file-link1 ++TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid ++ ++TEST kill_brick $V0 $H0 $B0/${V0}2 ++TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2 ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2 ++TEST ! stat $M0/l2/file-link2 ++TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid ++ ++#Simulate only anon-inodes present in all bricks ++TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2 ++ ++#Test that shd doesn't cleanup anon-inodes when some bricks are down ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST $CLI volume heal $V0 enable ++$CLI volume heal $V0 ++sleep 5 #Allow time for completion of one scan ++TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid ++TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid ++TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid ++rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid) ++ ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2 ++ ++#Test that rename indeed happened instead of rmdir/mkdir ++renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a) ++EXPECT "$rename_dir_inum" echo $renamed_dir_inum ++cleanup; +diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t +new file mode 100644 +index 0000000..0803a08 +--- /dev/null ++++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t +@@ -0,0 +1,464 @@ ++#!/bin/bash ++ ++#This file checks if missing entry self-heal and entry self-heal are working ++#as expected. ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++function get_file_type { ++ stat -c "%a:%F:%g:%t:%T:%u" $1 ++} ++ ++function diff_dirs { ++ diff <(ls $1 | sort) <(ls $2 | sort) ++} ++ ++function heal_status { ++ local f1_path="${1}/${3}" ++ local f2_path="${2}/${3}" ++ local insync="" ++ diff_dirs $f1_path $f2_path ++ if [ $? -eq 0 ]; ++ then ++ insync="Y" ++ else ++ insync="N" ++ fi ++ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path) ++ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path) ++ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path) ++ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path) ++ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path) ++ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path) ++ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi ++ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi ++ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi ++ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi ++ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi ++ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi ++ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2} ++} ++ ++function is_heal_done { ++ local zero_xattr="000000000000000000000000" ++ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ]; ++ then ++ echo "Y" ++ else ++ echo "N" ++ fi ++} ++ ++function print_pending_heals { ++ local result=":" ++ for i in "$@"; ++ do ++ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ]; ++ then ++ result="$result:$i" ++ fi ++ done ++#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names ++ if [ $result == ":" ]; then result="~"; fi ++ echo $result ++} ++ ++zero_xattr="000000000000000000000000" ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} ++TEST $CLI volume heal $V0 disable ++TEST $CLI volume set $V0 cluster.use-anonymous-inode off ++TEST $CLI volume set $V0 performance.write-behind off ++TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.readdir-ahead off ++TEST $CLI volume set $V0 performance.open-behind off ++TEST $CLI volume set $V0 performance.stat-prefetch off ++TEST $CLI volume set $V0 performance.io-cache off ++TEST $CLI volume set $V0 performance.quick-read off ++TEST $CLI volume set $V0 cluster.data-self-heal on ++TEST $CLI volume set $V0 cluster.metadata-self-heal on ++TEST $CLI volume set $V0 cluster.entry-self-heal on ++TEST $CLI volume start $V0 ++ ++TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 ++cd $M0 ++#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens ++#spb is split-brain, fool is all fool ++ ++#source_self_accusing means there exists source and a sink which self-accuses. ++#This simulates failures where fops failed on the bricks without it going down. ++#Something like EACCESS/EDQUOT etc ++ ++TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing ++TEST mkfifo source_deletions_heal/fifo ++TEST mknod source_deletions_heal/block b 4 5 ++TEST mknod source_deletions_heal/char c 1 5 ++TEST touch source_deletions_heal/file ++TEST ln -s source_deletions_heal/file source_deletions_heal/slink ++TEST mkdir source_deletions_heal/dir1 ++TEST mkdir source_deletions_heal/dir1/dir2 ++ ++TEST mkfifo source_deletions_me/fifo ++TEST mknod source_deletions_me/block b 4 5 ++TEST mknod source_deletions_me/char c 1 5 ++TEST touch source_deletions_me/file ++TEST ln -s source_deletions_me/file source_deletions_me/slink ++TEST mkdir source_deletions_me/dir1 ++TEST mkdir source_deletions_me/dir1/dir2 ++ ++TEST mkfifo source_self_accusing/fifo ++TEST mknod source_self_accusing/block b 4 5 ++TEST mknod source_self_accusing/char c 1 5 ++TEST touch source_self_accusing/file ++TEST ln -s source_self_accusing/file source_self_accusing/slink ++TEST mkdir source_self_accusing/dir1 ++TEST mkdir source_self_accusing/dir1/dir2 ++ ++TEST kill_brick $V0 $H0 $B0/${V0}0 ++ ++TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0 ++TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1 ++TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1 ++TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1 ++ ++#Test that the files are deleted ++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo ++TEST ! stat $B0/${V0}1/source_deletions_heal/block ++TEST ! stat $B0/${V0}1/source_deletions_heal/char ++TEST ! stat $B0/${V0}1/source_deletions_heal/file ++TEST ! stat $B0/${V0}1/source_deletions_heal/slink ++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 ++TEST ! stat $B0/${V0}1/source_deletions_me/fifo ++TEST ! stat $B0/${V0}1/source_deletions_me/block ++TEST ! stat $B0/${V0}1/source_deletions_me/char ++TEST ! stat $B0/${V0}1/source_deletions_me/file ++TEST ! stat $B0/${V0}1/source_deletions_me/slink ++TEST ! stat $B0/${V0}1/source_deletions_me/dir1 ++TEST ! stat $B0/${V0}1/source_self_accusing/fifo ++TEST ! stat $B0/${V0}1/source_self_accusing/block ++TEST ! stat $B0/${V0}1/source_self_accusing/char ++TEST ! stat $B0/${V0}1/source_self_accusing/file ++TEST ! stat $B0/${V0}1/source_self_accusing/slink ++TEST ! stat $B0/${V0}1/source_self_accusing/dir1 ++ ++ ++TEST mkfifo source_creations_heal/fifo ++TEST mknod source_creations_heal/block b 4 5 ++TEST mknod source_creations_heal/char c 1 5 ++TEST touch source_creations_heal/file ++TEST ln -s source_creations_heal/file source_creations_heal/slink ++TEST mkdir source_creations_heal/dir1 ++TEST mkdir source_creations_heal/dir1/dir2 ++ ++TEST mkfifo source_creations_me/fifo ++TEST mknod source_creations_me/block b 4 5 ++TEST mknod source_creations_me/char c 1 5 ++TEST touch source_creations_me/file ++TEST ln -s source_creations_me/file source_creations_me/slink ++TEST mkdir source_creations_me/dir1 ++TEST mkdir source_creations_me/dir1/dir2 ++ ++$CLI volume stop $V0 ++ ++#simulate fool fool scenario for fool_* dirs ++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me} ++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} ++ ++#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty ++setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me} ++setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me} ++ ++$CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++ ++TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1 ++ ++$CLI volume stop $V0 ++ ++#simulate fool fool scenario for fool_* dirs ++setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me} ++setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} ++setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} ++ ++#simulate self-accusing for source_self_accusing ++TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing ++ ++$CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++# Check if conservative merges happened correctly on _me_ dirs ++TEST stat spb_me_heal/1 ++TEST stat $B0/${V0}0/spb_me_heal/1 ++TEST stat $B0/${V0}1/spb_me_heal/1 ++ ++TEST stat spb_me_heal/0 ++TEST stat $B0/${V0}0/spb_me_heal/0 ++TEST stat $B0/${V0}1/spb_me_heal/0 ++ ++TEST stat fool_me/1 ++TEST stat $B0/${V0}0/fool_me/1 ++TEST stat $B0/${V0}1/fool_me/1 ++ ++TEST stat fool_me/0 ++TEST stat $B0/${V0}0/fool_me/0 ++TEST stat $B0/${V0}1/fool_me/0 ++ ++TEST stat v1_fool_me/0 ++TEST stat $B0/${V0}0/v1_fool_me/0 ++TEST stat $B0/${V0}1/v1_fool_me/0 ++ ++TEST stat v1_fool_me/1 ++TEST stat $B0/${V0}0/v1_fool_me/1 ++TEST stat $B0/${V0}1/v1_fool_me/1 ++ ++TEST stat v1_dirty_me/0 ++TEST stat $B0/${V0}0/v1_dirty_me/0 ++TEST stat $B0/${V0}1/v1_dirty_me/0 ++ ++#Check if files that have gfid-mismatches in _me_ are giving EIO ++TEST ! stat spb_me/0 ++ ++#Check if stale files are deleted on access ++TEST ! stat source_deletions_me/fifo ++TEST ! stat $B0/${V0}0/source_deletions_me/fifo ++TEST ! stat $B0/${V0}1/source_deletions_me/fifo ++TEST ! stat source_deletions_me/block ++TEST ! stat $B0/${V0}0/source_deletions_me/block ++TEST ! stat $B0/${V0}1/source_deletions_me/block ++TEST ! stat source_deletions_me/char ++TEST ! stat $B0/${V0}0/source_deletions_me/char ++TEST ! stat $B0/${V0}1/source_deletions_me/char ++TEST ! stat source_deletions_me/file ++TEST ! stat $B0/${V0}0/source_deletions_me/file ++TEST ! stat $B0/${V0}1/source_deletions_me/file ++TEST ! stat source_deletions_me/file ++TEST ! stat $B0/${V0}0/source_deletions_me/file ++TEST ! stat $B0/${V0}1/source_deletions_me/file ++TEST ! stat source_deletions_me/dir1/dir2 ++TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2 ++TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2 ++TEST ! stat source_deletions_me/dir1 ++TEST ! stat $B0/${V0}0/source_deletions_me/dir1 ++TEST ! stat $B0/${V0}1/source_deletions_me/dir1 ++ ++#Test if the files created as part of access are healed correctly ++r=$(get_file_type source_creations_me/fifo) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo ++TEST [ -p source_creations_me/fifo ] ++ ++r=$(get_file_type source_creations_me/block) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block ++TEST [ -b source_creations_me/block ] ++ ++r=$(get_file_type source_creations_me/char) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char ++TEST [ -c source_creations_me/char ] ++ ++r=$(get_file_type source_creations_me/file) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file ++TEST [ -f source_creations_me/file ] ++ ++r=$(get_file_type source_creations_me/slink) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink ++TEST [ -h source_creations_me/slink ] ++ ++r=$(get_file_type source_creations_me/dir1/dir2) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2 ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2 ++TEST [ -d source_creations_me/dir1/dir2 ] ++ ++r=$(get_file_type source_creations_me/dir1) ++EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1 ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1 ++TEST [ -d source_creations_me/dir1 ] ++ ++#Trigger heal and check _heal dirs are healed properly ++#Trigger change in event generation number. That way inodes would get refreshed during lookup ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++$CLI volume start $V0 force ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++TEST stat spb_heal ++TEST stat spb_me_heal ++TEST stat fool_heal ++TEST stat fool_me ++TEST stat v1_fool_heal ++TEST stat v1_fool_me ++TEST stat source_deletions_heal ++TEST stat source_deletions_me ++TEST stat source_self_accusing ++TEST stat source_creations_heal ++TEST stat source_creations_me ++TEST stat v1_dirty_heal ++TEST stat v1_dirty_me ++TEST $CLI volume stop $V0 ++TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/* ++ ++$CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++#Create base entry in indices/xattrop ++echo "Data" > $M0/FILE ++rm -f $M0/FILE ++EXPECT "1" count_index_entries $B0/${V0}0 ++EXPECT "1" count_index_entries $B0/${V0}1 ++ ++TEST $CLI volume stop $V0; ++ ++#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal ++create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 ++ ++$CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++ ++$CLI volume heal $V0 enable ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++ ++TEST $CLI volume heal $V0; ++EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing ++ ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal ++EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me ++ ++#Don't access the files/dirs from mount point as that may cause self-heals ++# Check if conservative merges happened correctly on heal dirs ++TEST stat $B0/${V0}0/spb_heal/1 ++TEST stat $B0/${V0}1/spb_heal/1 ++ ++TEST stat $B0/${V0}0/spb_heal/0 ++TEST stat $B0/${V0}1/spb_heal/0 ++ ++TEST stat $B0/${V0}0/fool_heal/1 ++TEST stat $B0/${V0}1/fool_heal/1 ++ ++TEST stat $B0/${V0}0/fool_heal/0 ++TEST stat $B0/${V0}1/fool_heal/0 ++ ++TEST stat $B0/${V0}0/v1_fool_heal/0 ++TEST stat $B0/${V0}1/v1_fool_heal/0 ++ ++TEST stat $B0/${V0}0/v1_fool_heal/1 ++TEST stat $B0/${V0}1/v1_fool_heal/1 ++ ++TEST stat $B0/${V0}0/v1_dirty_heal/0 ++TEST stat $B0/${V0}1/v1_dirty_heal/0 ++ ++#Check if files that have gfid-mismatches in spb are giving EIO ++TEST ! stat spb/0 ++ ++#Check if stale files are deleted on access ++TEST ! stat $B0/${V0}0/source_deletions_heal/fifo ++TEST ! stat $B0/${V0}1/source_deletions_heal/fifo ++TEST ! stat $B0/${V0}0/source_deletions_heal/block ++TEST ! stat $B0/${V0}1/source_deletions_heal/block ++TEST ! stat $B0/${V0}0/source_deletions_heal/char ++TEST ! stat $B0/${V0}1/source_deletions_heal/char ++TEST ! stat $B0/${V0}0/source_deletions_heal/file ++TEST ! stat $B0/${V0}1/source_deletions_heal/file ++TEST ! stat $B0/${V0}0/source_deletions_heal/file ++TEST ! stat $B0/${V0}1/source_deletions_heal/file ++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2 ++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2 ++TEST ! stat $B0/${V0}0/source_deletions_heal/dir1 ++TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 ++ ++#Check if stale files are deleted on access ++TEST ! stat $B0/${V0}0/source_self_accusing/fifo ++TEST ! stat $B0/${V0}1/source_self_accusing/fifo ++TEST ! stat $B0/${V0}0/source_self_accusing/block ++TEST ! stat $B0/${V0}1/source_self_accusing/block ++TEST ! stat $B0/${V0}0/source_self_accusing/char ++TEST ! stat $B0/${V0}1/source_self_accusing/char ++TEST ! stat $B0/${V0}0/source_self_accusing/file ++TEST ! stat $B0/${V0}1/source_self_accusing/file ++TEST ! stat $B0/${V0}0/source_self_accusing/file ++TEST ! stat $B0/${V0}1/source_self_accusing/file ++TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2 ++TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2 ++TEST ! stat $B0/${V0}0/source_self_accusing/dir1 ++TEST ! stat $B0/${V0}1/source_self_accusing/dir1 ++ ++#Test if the files created as part of full self-heal correctly ++r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo ++TEST [ -p $B0/${V0}0/source_creations_heal/fifo ] ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block ++EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/block) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/char) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char ++EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/file) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file ++TEST [ -f $B0/${V0}0/source_creations_heal/file ] ++ ++r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink ++TEST [ -h $B0/${V0}0/source_creations_heal/slink ] ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2 ++TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ] ++ ++r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1) ++EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1 ++TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ] ++ ++cd - ++ ++#Anonymous directory shouldn't be created ++TEST mkdir $M0/rename-dir ++before_rename=$(STAT_INO $B0/${V0}1/rename-dir) ++TEST kill_brick $V0 $H0 $B0/${V0}1 ++TEST mv $M0/rename-dir $M0/new-name ++TEST $CLI volume start $V0 force ++#Since features.ctime is not enabled by default in downstream, the below test ++#will fail. If ctime feature is enabled, there will be trusted.glusterfs.mdata ++#xattr set which will differ for the parent in the gfid split-brain scenario ++#and when lookup is triggered, the gfid gets added to indices/xattrop leading ++#the below test to pass in upstream. Hence commenting it here. ++#'spb' is in split-brain so pending-heal-count will be 2 ++#EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 ++after_rename=$(STAT_INO $B0/${V0}1/new-name) ++EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l) ++EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l) ++EXPECT_NOT "$before_rename" echo $after_rename ++cleanup +diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t +new file mode 100644 +index 0000000..256ee2a +--- /dev/null ++++ b/tests/basic/afr/rename-data-loss.t +@@ -0,0 +1,72 @@ ++#!/bin/bash ++#Self-heal tests ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++. $(dirname $0)/../../afr.rc ++ ++cleanup; ++ ++TEST glusterd ++TEST pidof glusterd ++TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} ++TEST $CLI volume set $V0 write-behind off ++TEST $CLI volume set $V0 self-heal-daemon off ++TEST $CLI volume set $V0 data-self-heal off ++TEST $CLI volume set $V0 metadata-self-heal off ++TEST $CLI volume set $V0 entry-self-heal off ++TEST $CLI volume start $V0 ++EXPECT 'Started' volinfo_field $V0 'Status' ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; ++ ++cd $M0 ++TEST `echo "line1" >> file1` ++TEST mkdir dir1 ++TEST mkdir dir2 ++TEST mkdir -p dir1/dira/dirb ++TEST `echo "line1">>dir1/dira/dirb/file1` ++TEST mkdir delete_me ++TEST `echo "line1" >> delete_me/file1` ++ ++#brick0 has witnessed the second write while brick1 is down. ++TEST kill_brick $V0 $H0 $B0/brick1 ++TEST `echo "line2" >> file1` ++TEST `echo "line2" >> dir1/dira/dirb/file1` ++TEST `echo "line2" >> delete_me/file1` ++ ++#Toggle the bricks that are up/down. ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 ++TEST kill_brick $V0 $H0 $B0/brick0 ++ ++#Rename when the 'source' brick0 for data-selfheals is down. ++mv file1 file2 ++mv dir1/dira dir2 ++ ++#Delete a dir when brick0 is down. ++rm -rf delete_me ++cd - ++ ++#Bring everything up and trigger heal ++TEST $CLI volume set $V0 self-heal-daemon on ++TEST $CLI volume start $V0 force ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 ++TEST $CLI volume heal $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0 ++EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1 ++ ++#Remount to avoid reading from caches ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; ++EXPECT "line2" tail -1 $M0/file2 ++EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1 ++TEST ! stat $M0/delete_me/file1 ++TEST ! stat $M0/delete_me ++ ++anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode) ++TEST [[ -d $B0/brick0/$anon_inode_name ]] ++TEST [[ -d $B0/brick1/$anon_inode_name ]] ++cleanup +diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t +index c208112..0115350 100644 +--- a/tests/bugs/replicate/bug-1744548-heal-timeout.t ++++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t +@@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0 + TEST $CLI volume profile $V0 start + TEST $CLI volume profile $V0 info clear + TEST $CLI volume heal $V0 enable +-# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes +-EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count ++# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode ++EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count + + # Check that a change in heal-timeout is honoured immediately. + TEST $CLI volume set $V0 cluster.heal-timeout 5 + sleep 10 + # Two crawls must have happened. +-EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count ++EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count + + # shd must not heal if it is disabled and heal-timeout is changed. + TEST $CLI volume heal $V0 disable +diff --git a/tests/features/trash.t b/tests/features/trash.t +index 472e909..da5b50b 100755 +--- a/tests/features/trash.t ++++ b/tests/features/trash.t +@@ -94,105 +94,105 @@ wildcard_not_exists() { + if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi + } + +-# testing glusterd [1-3] ++# testing glusterd + TEST glusterd + TEST pidof glusterd + TEST $CLI volume info + +-# creating distributed volume [4] ++# creating distributed volume + TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2} + +-# checking volume status [5-7] ++# checking volume status + EXPECT "$V0" volinfo_field $V0 'Volume Name' + EXPECT 'Created' volinfo_field $V0 'Status' + EXPECT '2' brick_count $V0 + +-# test without enabling trash translator [8] ++# test without enabling trash translator + TEST start_vol $V0 $M0 + +-# test on enabling trash translator [9-10] ++# test on enabling trash translator + TEST $CLI volume set $V0 features.trash on + EXPECT 'on' volinfo_field $V0 'features.trash' + +-# files directly under mount point [11] ++# files directly under mount point + create_files $M0/file1 $M0/file2 + TEST file_exists $V0 file1 file2 + +-# perform unlink [12] ++# perform unlink + TEST unlink_op file1 + +-# perform truncate [13] ++# perform truncate + TEST truncate_op file2 4 + +-# create files directory hierarchy and check [14] ++# create files directory hierarchy and check + mkdir -p $M0/1/2/3 + create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2 + TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2 + +-# perform unlink [15] ++# perform unlink + TEST unlink_op 1/2/3/foo1 + +-# perform truncate [16] ++# perform truncate + TEST truncate_op 1/2/3/foo2 4 + + # create a directory for eliminate pattern + mkdir $M0/a + +-# set the eliminate pattern [17-18] ++# set the eliminate pattern + TEST $CLI volume set $V0 features.trash-eliminate-path /a + EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path' + +-# create two files and check [19] ++# create two files and check + create_files $M0/a/test1 $M0/a/test2 + TEST file_exists $V0 a/test1 a/test2 + +-# remove from eliminate pattern [20] ++# remove from eliminate pattern + rm -f $M0/a/test1 + EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1* + +-# truncate from eliminate path [21-23] ++# truncate from eliminate path + truncate -s 2 $M0/a/test2 + TEST [ -e $M0/a/test2 ] + TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ] + EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2* + +-# set internal op on [24-25] ++# set internal op on + TEST $CLI volume set $V0 features.trash-internal-op on + EXPECT 'on' volinfo_field $V0 'features.trash-internal-op' + +-# again create two files and check [26] ++# again create two files and check + create_files $M0/inop1 $M0/inop2 + TEST file_exists $V0 inop1 inop2 + +-# perform unlink [27] ++# perform unlink + TEST unlink_op inop1 + +-# perform truncate [28] ++# perform truncate + TEST truncate_op inop2 4 + +-# remove one brick and restart the volume [28-31] ++# remove one brick and restart the volume + TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + TEST $CLI volume stop $V0 + TEST start_vol $V0 $M0 $M0/.trashcan + +-# again create two files and check [33] ++# again create two files and check + create_files $M0/rebal1 $M0/rebal2 + TEST file_exists $V0 rebal1 rebal2 + +-# add one brick [34-35] ++# add one brick + TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 + TEST [ -d $B0/${V0}3 ] + + +-# perform rebalance [36] ++# perform rebalance + TEST $CLI volume rebalance $V0 start force + EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed + + #Find out which file was migrated to the new brick + file_name=$(ls $B0/${V0}3/rebal*| xargs basename) + +-# check whether rebalance was succesful [37-40] ++# check whether rebalance was succesful + EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name* + EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name* + +@@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + # force required in case rebalance is not over + TEST $CLI volume stop $V0 force + +-# create a replicated volume [41] ++# create a replicated volume + TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2} + +-# checking volume status [42-45] ++# checking volume status + EXPECT "$V1" volinfo_field $V1 'Volume Name' + EXPECT 'Replicate' volinfo_field $V1 'Type' + EXPECT 'Created' volinfo_field $V1 'Status' + EXPECT '2' brick_count $V1 + +-# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50] ++# enable trash with options and start the replicate volume by disabling automatic self-heal + TEST $CLI volume set $V1 features.trash on + TEST $CLI volume set $V1 features.trash-internal-op on + EXPECT 'on' volinfo_field $V1 'features.trash' + EXPECT 'on' volinfo_field $V1 'features.trash-internal-op' + TEST start_vol $V1 $M1 $M1/.trashcan + +-# mount and check for trash directory [51] ++# mount and check for trash directory + TEST [ -d $M1/.trashcan/internal_op ] + +-# create a file and check [52] ++# create a file and check + touch $M1/self + TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ] + +-# kill one brick and delete the file from mount point [53-54] ++# kill one brick and delete the file from mount point + kill_brick $V1 $H0 $B0/${V1}1 + EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count + rm -f $M1/self + EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self* + +-# force start the volume and trigger the self-heal manually [55-57] +-TEST $CLI volume start $V1 force +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count +-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +-# Since we created the file under root of the volume, it will be +-# healed automatically +- +-# check for the removed file in trashcan [58] +-EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self* +- +-# check renaming of trash directory through cli [59-62] ++# check renaming of trash directory through cli + TEST $CLI volume set $V0 trash-dir abc + TEST start_vol $V0 $M0 $M0/abc + TEST [ -e $M0/abc -a ! -e $M0/.trashcan ] + EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal* + +-# ensure that rename and delete operation on trash directory fails [63-65] ++# ensure that rename and delete operation on trash directory fails + rm -rf $M0/abc/internal_op + TEST [ -e $M0/abc/internal_op ] + rm -rf $M0/abc/ +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 90b4f14..6f2da11 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -47,6 +47,41 @@ afr_quorum_errno(afr_private_t *priv) + return ENOTCONN; + } + ++gf_boolean_t ++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, ++ pid_t pid) ++{ ++ if (!__is_root_gfid(pargfid)) { ++ return _gf_false; ++ } ++ ++ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { ++ /*For backward compatibility /.landfill is private*/ ++ return _gf_true; ++ } ++ ++ if (pid == GF_CLIENT_PID_GSYNCD) { ++ /*geo-rep needs to create/sync private directory on slave because ++ * it appears in changelog*/ ++ return _gf_false; ++ } ++ ++ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { ++ if (strcmp(name, priv->anon_inode_name) == 0) { ++ /* anonymous-inode dir is private*/ ++ return _gf_true; ++ } ++ } else { ++ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == ++ 0) { ++ /* anonymous-inode dir prefix is private for geo-rep to work*/ ++ return _gf_true; ++ } ++ } ++ ++ return _gf_false; ++} ++ + int + afr_fav_child_reset_sink_xattrs(void *opaque); + +@@ -3301,11 +3336,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + return 0; + } + +- if (__is_root_gfid(loc->parent->gfid)) { +- if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { +- op_errno = EPERM; +- goto out; +- } ++ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, ++ frame->root->pid)) { ++ op_errno = EPERM; ++ goto out; + } + + local = AFR_FRAME_INIT(frame, op_errno); +@@ -4832,6 +4866,7 @@ afr_priv_dump(xlator_t *this) + priv->background_self_heal_count); + gf_proc_dump_write("healers", "%d", priv->healers); + gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); ++ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); + if (priv->quorum_count == AFR_QUORUM_AUTO) { + gf_proc_dump_write("quorum-type", "auto"); + } else if (priv->quorum_count == 0) { +@@ -5792,6 +5827,7 @@ afr_priv_destroy(afr_private_t *priv) + GF_FREE(priv->local); + GF_FREE(priv->pending_key); + GF_FREE(priv->children); ++ GF_FREE(priv->anon_inode); + GF_FREE(priv->child_up); + GF_FREE(priv->child_latency); + LOCK_DESTROY(&priv->lock); +diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c +index 6307b63..d64b6a9 100644 +--- a/xlators/cluster/afr/src/afr-dir-read.c ++++ b/xlators/cluster/afr/src/afr-dir-read.c +@@ -158,8 +158,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) + } + + static void +-afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, +- gf_dirent_t *entries, fd_t *fd) ++afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, ++ int subvol, gf_dirent_t *entries, fd_t *fd) + { + int ret = -1; + gf_dirent_t *entry = NULL; +@@ -177,8 +177,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, + + list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) + { +- if (__is_root_gfid(fd->inode->gfid) && +- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { ++ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, ++ frame->root->pid)) { + continue; + } + +@@ -222,8 +222,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + } + + if (op_ret >= 0) +- afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, +- local->fd); ++ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, ++ &entries, local->fd); + + AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 9b6575f..0a8a7fd 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -2753,3 +2753,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, + out: + return source; + } ++ ++static int ++afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, inode_t *inode, ++ struct iatt *buf, struct iatt *preparent, ++ struct iatt *postparent, dict_t *xdata) ++{ ++ afr_local_t *local = frame->local; ++ int i = (long)cookie; ++ ++ local->replies[i].valid = 1; ++ local->replies[i].op_ret = op_ret; ++ local->replies[i].op_errno = op_errno; ++ if (op_ret == 0) { ++ local->op_ret = 0; ++ local->replies[i].poststat = *buf; ++ local->replies[i].preparent = *preparent; ++ local->replies[i].postparent = *postparent; ++ } ++ if (xdata) { ++ local->replies[i].xdata = dict_ref(xdata); ++ } ++ ++ syncbarrier_wake(&local->barrier); ++ return 0; ++} ++ ++int ++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) ++{ ++ call_frame_t *frame = NULL; ++ afr_local_t *local = NULL; ++ afr_private_t *priv = this->private; ++ unsigned char *mkdir_on = alloca0(priv->child_count); ++ unsigned char *lookup_on = alloca0(priv->child_count); ++ loc_t loc = {0}; ++ int32_t op_errno = 0; ++ int32_t child_op_errno = 0; ++ struct iatt iatt = {0}; ++ dict_t *xdata = NULL; ++ uuid_t anon_inode_gfid = {0}; ++ int mkdir_count = 0; ++ int i = 0; ++ ++ /*Try to mkdir everywhere and return success if the dir exists on 'child' ++ */ ++ ++ if (!priv->use_anon_inode) { ++ op_errno = EINVAL; ++ goto out; ++ } ++ ++ frame = afr_frame_create(this, &op_errno); ++ if (op_errno) { ++ goto out; ++ } ++ local = frame->local; ++ if (!local->child_up[child]) { ++ /*Other bricks may need mkdir so don't error out yet*/ ++ child_op_errno = ENOTCONN; ++ } ++ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); ++ for (i = 0; i < priv->child_count; i++) { ++ if (!local->child_up[i]) ++ continue; ++ ++ if (priv->anon_inode[i]) { ++ mkdir_on[i] = 0; ++ } else { ++ mkdir_on[i] = 1; ++ mkdir_count++; ++ } ++ } ++ ++ if (mkdir_count == 0) { ++ *linked_inode = inode_find(this->itable, anon_inode_gfid); ++ if (*linked_inode) { ++ op_errno = 0; ++ goto out; ++ } ++ } ++ ++ loc.parent = inode_ref(this->itable->root); ++ loc.name = priv->anon_inode_name; ++ loc.inode = inode_new(this->itable); ++ if (!loc.inode) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ ++ xdata = dict_new(); ++ if (!xdata) { ++ op_errno = ENOMEM; ++ goto out; ++ } ++ ++ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); ++ if (op_errno) { ++ goto out; ++ } ++ ++ if (mkdir_count == 0) { ++ memcpy(lookup_on, local->child_up, priv->child_count); ++ goto lookup; ++ } ++ ++ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, ++ xdata); ++ ++ for (i = 0; i < priv->child_count; i++) { ++ if (!mkdir_on[i]) { ++ continue; ++ } ++ ++ if (local->replies[i].op_ret == 0) { ++ priv->anon_inode[i] = 1; ++ iatt = local->replies[i].poststat; ++ } else if (local->replies[i].op_ret < 0 && ++ local->replies[i].op_errno == EEXIST) { ++ lookup_on[i] = 1; ++ } else if (i == child) { ++ child_op_errno = local->replies[i].op_errno; ++ } ++ } ++ ++ if (AFR_COUNT(lookup_on, priv->child_count) == 0) { ++ goto link; ++ } ++ ++lookup: ++ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, ++ xdata); ++ for (i = 0; i < priv->child_count; i++) { ++ if (!lookup_on[i]) { ++ continue; ++ } ++ ++ if (local->replies[i].op_ret == 0) { ++ if (gf_uuid_compare(anon_inode_gfid, ++ local->replies[i].poststat.ia_gfid) == 0) { ++ priv->anon_inode[i] = 1; ++ iatt = local->replies[i].poststat; ++ } else { ++ if (i == child) ++ child_op_errno = EINVAL; ++ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, ++ "%s has gfid: %s", priv->anon_inode_name, ++ uuid_utoa(local->replies[i].poststat.ia_gfid)); ++ } ++ } else if (i == child) { ++ child_op_errno = local->replies[i].op_errno; ++ } ++ } ++link: ++ if (!gf_uuid_is_null(iatt.ia_gfid)) { ++ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); ++ if (*linked_inode) { ++ op_errno = 0; ++ inode_lookup(*linked_inode); ++ } else { ++ op_errno = ENOMEM; ++ } ++ goto out; ++ } ++ ++out: ++ if (xdata) ++ dict_unref(xdata); ++ loc_wipe(&loc); ++ /*child_op_errno takes precedence*/ ++ if (child_op_errno == 0) { ++ child_op_errno = op_errno; ++ } ++ ++ if (child_op_errno && *linked_inode) { ++ inode_unref(*linked_inode); ++ *linked_inode = NULL; ++ } ++ if (frame) ++ AFR_STACK_DESTROY(frame); ++ return -child_op_errno; ++} +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 00b5b2d..20b07dd 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -16,54 +16,170 @@ + #include <glusterfs/syncop-utils.h> + #include <glusterfs/events.h> + +-static int +-afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, +- inode_t *inode, int child, struct afr_reply *replies) ++int ++afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, ++ inode_t *inode, int child, ++ struct afr_reply *replies, ++ gf_boolean_t *anon_inode) + { + afr_private_t *priv = NULL; ++ afr_local_t *local = NULL; + xlator_t *subvol = NULL; + int ret = 0; ++ int i = 0; ++ char g[64] = {0}; ++ unsigned char *lookup_success = NULL; ++ call_frame_t *frame = NULL; ++ loc_t loc2 = { ++ 0, ++ }; + loc_t loc = { + 0, + }; +- char g[64]; + + priv = this->private; +- + subvol = priv->children[child]; ++ lookup_success = alloca0(priv->child_count); ++ uuid_utoa_r(replies[child].poststat.ia_gfid, g); ++ loc.inode = inode_new(inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (replies[child].poststat.ia_type == IA_IFDIR) { ++ /* This directory may have sub-directory hierarchy which may need to ++ * be preserved for subsequent heals. So unconditionally move the ++ * directory to anonymous-inode directory*/ ++ *anon_inode = _gf_true; ++ goto anon_inode; ++ } ++ ++ frame = afr_frame_create(this, &ret); ++ if (!frame) { ++ ret = -ret; ++ goto out; ++ } ++ local = frame->local; ++ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); ++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, ++ NULL); ++ for (i = 0; i < priv->child_count; i++) { ++ if (local->replies[i].op_ret == 0) { ++ lookup_success[i] = 1; ++ } else if (local->replies[i].op_errno != ENOENT && ++ local->replies[i].op_errno != ESTALE) { ++ ret = -local->replies[i].op_errno; ++ } ++ } ++ ++ if (priv->quorum_count) { ++ if (afr_has_quorum(lookup_success, this, NULL)) { ++ *anon_inode = _gf_true; ++ } ++ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { ++ *anon_inode = _gf_true; ++ } else if (ret) { ++ goto out; ++ } ++ ++anon_inode: ++ if (!*anon_inode) { ++ ret = 0; ++ goto out; ++ } + + loc.parent = inode_ref(dir); + gf_uuid_copy(loc.pargfid, dir->gfid); + loc.name = name; +- loc.inode = inode_ref(inode); + +- if (replies[child].valid && replies[child].op_ret == 0) { +- switch (replies[child].poststat.ia_type) { +- case IA_IFDIR: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), +- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), +- subvol->name); +- ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), +- name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), +- subvol->name); +- ret = syncop_unlink(subvol, &loc, NULL, NULL); +- break; +- } ++ ret = afr_anon_inode_create(this, child, &loc2.parent); ++ if (ret < 0) ++ goto out; ++ ++ loc2.name = g; ++ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); ++ if (ret < 0) { ++ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "Rename to %s dir %s/%s (%s) on %s failed", ++ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, ++ subvol->name); ++ } else { ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "Rename to %s dir %s/%s (%s) on %s successful", ++ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, ++ subvol->name); + } + ++out: + loc_wipe(&loc); ++ loc_wipe(&loc2); ++ if (frame) { ++ AFR_STACK_DESTROY(frame); ++ } + + return ret; + } + + int ++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, ++ inode_t *inode, int child, struct afr_reply *replies) ++{ ++ char g[64] = {0}; ++ afr_private_t *priv = NULL; ++ xlator_t *subvol = NULL; ++ int ret = 0; ++ loc_t loc = { ++ 0, ++ }; ++ gf_boolean_t anon_inode = _gf_false; ++ ++ priv = this->private; ++ subvol = priv->children[child]; ++ ++ if ((!replies[child].valid) || (replies[child].op_ret < 0)) { ++ /*Nothing to do*/ ++ ret = 0; ++ goto out; ++ } ++ ++ if (priv->use_anon_inode) { ++ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, ++ replies, &anon_inode); ++ if (ret < 0 || anon_inode) ++ goto out; ++ } ++ ++ loc.parent = inode_ref(dir); ++ loc.inode = inode_new(inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ loc.name = name; ++ switch (replies[child].poststat.ia_type) { ++ case IA_IFDIR: ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, ++ uuid_utoa_r(replies[child].poststat.ia_gfid, g), ++ subvol->name); ++ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); ++ break; ++ default: ++ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), ++ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), ++ subvol->name); ++ ret = syncop_unlink(subvol, &loc, NULL, NULL); ++ break; ++ } ++ ++out: ++ loc_wipe(&loc); ++ return ret; ++} ++ ++int + afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + unsigned char *sources, inode_t *dir, + const char *name, inode_t *inode, +@@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + loc_t srcloc = { + 0, + }; ++ loc_t anonloc = { ++ 0, ++ }; + xlator_t *this = frame->this; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; +@@ -86,15 +205,18 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + 0, + }; + unsigned char *newentry = NULL; ++ char iatt_uuid_str[64] = {0}; ++ char dir_uuid_str[64] = {0}; + + priv = this->private; + iatt = &replies[source].poststat; ++ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); + if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, + "Invalid ia_type (%d) or gfid(%s). source brick=%d, " + "pargfid=%s, name=%s", +- iatt->ia_type, uuid_utoa(iatt->ia_gfid), source, +- uuid_utoa(dir->gfid), name); ++ iatt->ia_type, iatt_uuid_str, source, ++ uuid_utoa_r(dir->gfid, dir_uuid_str), name); + ret = -EINVAL; + goto out; + } +@@ -119,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, + + srcloc.inode = inode_ref(inode); + gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); +- if (iatt->ia_type != IA_IFDIR) +- ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); +- if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { ++ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); ++ if (ret == -ENOENT || ret == -ESTALE) { + newentry[dst] = 1; + ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, + sources, newentry); + if (ret) + goto out; ++ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { ++ // Try rename from hidden directory ++ ret = afr_anon_inode_create(this, dst, &anonloc.parent); ++ if (ret < 0) ++ goto out; ++ anonloc.inode = inode_ref(inode); ++ anonloc.name = iatt_uuid_str; ++ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); ++ if (ret == -ENOENT || ret == -ESTALE) ++ ret = -1; /*This sets 'mismatch' to true*/ ++ goto out; + } + + mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); +@@ -165,6 +297,7 @@ out: + GF_FREE(linkname); + loc_wipe(&loc); + loc_wipe(&srcloc); ++ loc_wipe(&anonloc); + return ret; + } + +@@ -580,6 +713,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + + priv = this->private; + ++ if (afr_is_private_directory(priv, fd->inode->gfid, name, ++ GF_CLIENT_PID_SELF_HEALD)) { ++ return 0; ++ } ++ + xattr = dict_new(); + if (!xattr) + return -ENOMEM; +@@ -628,7 +766,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + replies); + + if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { +- ret = afr_shd_index_purge(subvol, parent_idx_inode, name, ++ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, + inode->ia_type); + /* Why is ret force-set to 0? We do not care about + * index purge failing for full heal as it is quite +@@ -758,10 +896,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + +- if (__is_root_gfid(fd->inode->gfid) && +- !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) +- continue; +- + ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, + loc.inode, subvol, + local->need_full_crawl); +@@ -824,7 +958,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, + /* The name indices under the pgfid index dir are guaranteed + * to be regular files. Hence the hardcoding. + */ +- afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ++ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + ret = 0; + goto out; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index dace071..51e3d8c 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, + const char *bname, inode_t *inode, + struct afr_reply *replies) + { +- loc_t loc = { +- 0, +- }; + int i = 0; + afr_private_t *priv = NULL; +- char g[64]; + int ret = 0; + + priv = this->private; + +- loc.parent = inode_ref(parent); +- gf_uuid_copy(loc.pargfid, pargfid); +- loc.name = bname; +- loc.inode = inode_ref(inode); +- + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid) + continue; +@@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, + if (replies[i].op_ret) + continue; + +- switch (replies[i].poststat.ia_type) { +- case IA_IFDIR: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), +- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), +- priv->children[i]->name); +- +- ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); +- break; +- default: +- gf_msg(this->name, GF_LOG_WARNING, 0, +- AFR_MSG_EXPUNGING_FILE_OR_DIR, +- "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), +- bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), +- priv->children[i]->name); +- +- ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); +- break; +- } ++ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, ++ replies); + } + +- loc_wipe(&loc); +- + return ret; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index 8f6fb00..c8dc384 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -370,4 +370,9 @@ gf_boolean_t + afr_is_file_empty_on_all_children(afr_private_t *priv, + struct afr_reply *replies); + ++int ++afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, ++ inode_t *inode, int child, struct afr_reply *replies); ++int ++afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); + #endif /* !_AFR_SELFHEAL_H */ +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 95ac5f2..939a135 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -222,7 +222,7 @@ out: + } + + int +-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, ++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, + ia_type_t type) + { + int ret = 0; +@@ -422,7 +422,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + ret = afr_shd_selfheal(healer, healer->subvol, gfid); + + if (ret == -ENOENT || ret == -ESTALE) +- afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); ++ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); + + if (ret == 2) + /* If bricks crashed in pre-op after creating indices/xattrop +@@ -798,6 +798,176 @@ afr_bricks_available_for_heal(afr_private_t *priv) + return _gf_true; + } + ++static int ++afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ++ void *data) ++{ ++ struct subvol_healer *healer = data; ++ afr_private_t *priv = healer->this->private; ++ call_frame_t *frame = NULL; ++ afr_local_t *local = NULL; ++ int ret = 0; ++ loc_t loc = {0}; ++ int count = 0; ++ int i = 0; ++ int op_errno = 0; ++ struct iatt *iatt = NULL; ++ gf_boolean_t multiple_links = _gf_false; ++ unsigned char *gfid_present = alloca0(priv->child_count); ++ unsigned char *entry_present = alloca0(priv->child_count); ++ char *type = "file"; ++ ++ frame = afr_frame_create(healer->this, &ret); ++ if (!frame) { ++ ret = -ret; ++ goto out; ++ } ++ local = frame->local; ++ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { ++ gf_msg_debug(healer->this->name, 0, ++ "Not all bricks are up. Skipping " ++ "cleanup of %s on %s", ++ entry->d_name, subvol->name); ++ ret = 0; ++ goto out; ++ } ++ ++ loc.inode = inode_new(parent->inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ret = gf_uuid_parse(entry->d_name, loc.gfid); ++ if (ret) { ++ ret = 0; ++ goto out; ++ } ++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, ++ NULL); ++ for (i = 0; i < priv->child_count; i++) { ++ if (local->replies[i].op_ret == 0) { ++ count++; ++ gfid_present[i] = 1; ++ iatt = &local->replies[i].poststat; ++ if (iatt->ia_type == IA_IFDIR) { ++ type = "dir"; ++ } ++ ++ if (i == healer->subvol) { ++ if (local->replies[i].poststat.ia_nlink > 1) { ++ multiple_links = _gf_true; ++ } ++ } ++ } else if (local->replies[i].op_errno != ENOENT && ++ local->replies[i].op_errno != ESTALE) { ++ /*We don't have complete view. Skip the entry*/ ++ gf_msg_debug(healer->this->name, local->replies[i].op_errno, ++ "Skipping cleanup of %s on %s", entry->d_name, ++ subvol->name); ++ ret = 0; ++ goto out; ++ } ++ } ++ ++ /*Inode is deleted from subvol*/ ++ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { ++ gf_msg(healer->this->name, GF_LOG_WARNING, 0, ++ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, ++ priv->anon_inode_name, entry->d_name, subvol->name); ++ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, ++ iatt->ia_type); ++ if (ret == -ENOENT || ret == -ESTALE) ++ ret = 0; ++ } else if (count > 1) { ++ loc_wipe(&loc); ++ loc.parent = inode_ref(parent->inode); ++ loc.name = entry->d_name; ++ loc.inode = inode_new(parent->inode->table); ++ if (!loc.inode) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, ++ &loc, NULL); ++ count = 0; ++ for (i = 0; i < priv->child_count; i++) { ++ if (local->replies[i].op_ret == 0) { ++ count++; ++ entry_present[i] = 1; ++ iatt = &local->replies[i].poststat; ++ } else if (local->replies[i].op_errno != ENOENT && ++ local->replies[i].op_errno != ESTALE) { ++ /*We don't have complete view. Skip the entry*/ ++ gf_msg_debug(healer->this->name, local->replies[i].op_errno, ++ "Skipping cleanup of %s on %s", entry->d_name, ++ subvol->name); ++ ret = 0; ++ goto out; ++ } ++ } ++ for (i = 0; i < priv->child_count; i++) { ++ if (gfid_present[i] && !entry_present[i]) { ++ /*Entry is not anonymous on at least one subvol*/ ++ gf_msg_debug(healer->this->name, 0, ++ "Valid entry present on %s " ++ "Skipping cleanup of %s on %s", ++ priv->children[i]->name, entry->d_name, ++ subvol->name); ++ ret = 0; ++ goto out; ++ } ++ } ++ ++ gf_msg(healer->this->name, GF_LOG_WARNING, 0, ++ AFR_MSG_EXPUNGING_FILE_OR_DIR, ++ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, ++ entry->d_name); ++ ret = 0; ++ for (i = 0; i < priv->child_count; i++) { ++ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, ++ entry->d_name, iatt->ia_type); ++ if (op_errno != ENOENT && op_errno != ESTALE) { ++ ret |= -op_errno; ++ } ++ } ++ } ++ ++out: ++ if (frame) ++ AFR_STACK_DESTROY(frame); ++ loc_wipe(&loc); ++ return ret; ++} ++ ++static void ++afr_cleanup_anon_inode_dir(struct subvol_healer *healer) ++{ ++ int ret = 0; ++ call_frame_t *frame = NULL; ++ afr_private_t *priv = healer->this->private; ++ loc_t loc = {0}; ++ ++ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); ++ if (ret) ++ goto out; ++ ++ frame = afr_frame_create(healer->this, &ret); ++ if (!frame) { ++ ret = -ret; ++ goto out; ++ } ++ ++ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, ++ GF_CLIENT_PID_SELF_HEALD, healer, ++ afr_shd_anon_inode_cleaner, NULL, ++ priv->shd.max_threads, priv->shd.wait_qlength); ++out: ++ if (frame) ++ AFR_STACK_DESTROY(frame); ++ loc_wipe(&loc); ++ return; ++} ++ + void * + afr_shd_index_healer(void *data) + { +@@ -854,6 +1024,10 @@ afr_shd_index_healer(void *data) + sleep(1); + } while (ret > 0); + ++ if (ret == 0) { ++ afr_cleanup_anon_inode_dir(healer); ++ } ++ + if (pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { + afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, + pre_crawl_xdata); +diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h +index 1990539..acd567e 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.h ++++ b/xlators/cluster/afr/src/afr-self-heald.h +@@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, + char **path_p); + + int +-afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, ++afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, + ia_type_t type); + #endif /* !_AFR_SELF_HEALD_H */ +diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c +index bfa464f..33fe4d8 100644 +--- a/xlators/cluster/afr/src/afr.c ++++ b/xlators/cluster/afr/src/afr.c +@@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) + } + } + ++void ++afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) ++{ ++ char *volfile_id_str = NULL; ++ uuid_t anon_inode_gfid = {0}; ++ ++ /*If volume id is not present don't enable anything*/ ++ if (dict_get_str(options, "volume-id", &volfile_id_str)) ++ return; ++ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); ++ /*anon_inode_name is not supposed to change once assigned*/ ++ if (!priv->anon_inode_name[0]) { ++ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", ++ AFR_ANON_DIR_PREFIX, volfile_id_str); ++ gf_uuid_parse(volfile_id_str, anon_inode_gfid); ++ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ ++ anon_inode_gfid[0] ^= 1; ++ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); ++ } ++} ++ + int + reconfigure(xlator_t *this, dict_t *options) + { +@@ -287,6 +308,10 @@ reconfigure(xlator_t *this, dict_t *options) + consistent_io = _gf_false; + priv->consistent_io = consistent_io; + ++ afr_handle_anon_inode_options(priv, options); ++ ++ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, ++ out); + if (priv->shd.enabled) { + if ((priv->shd.enabled != enabled_old) || + (timeout_old != priv->shd.timeout)) +@@ -535,7 +560,9 @@ init(xlator_t *this) + + GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); + GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); ++ afr_handle_anon_inode_options(priv, this->options); + ++ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); + if (priv->quorum_count != 0) + priv->consistent_io = _gf_false; + +@@ -547,13 +574,16 @@ init(xlator_t *this) + goto out; + } + ++ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, ++ gf_afr_mt_char); ++ + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + + priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count, + gf_afr_mt_child_latency_t); + +- if (!priv->child_up || !priv->child_latency) { ++ if (!priv->child_up || !priv->child_latency || !priv->anon_inode) { + ret = -ENOMEM; + goto out; + } +@@ -1218,6 +1248,14 @@ struct volume_options options[] = { + .tags = {"replicate"}, + .description = "This option exists only for backward compatibility " + "and configuring it doesn't have any effect"}, ++ {.key = {"use-anonymous-inode"}, ++ .type = GF_OPTION_TYPE_BOOL, ++ .default_value = "no", ++ .op_version = {GD_OP_VERSION_7_0}, ++ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, ++ .tags = {"replicate"}, ++ .description = "Setting this option heals directory renames efficiently"}, ++ + {.key = {NULL}}, + }; + +diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h +index 3a2b26d..6a9a763 100644 +--- a/xlators/cluster/afr/src/afr.h ++++ b/xlators/cluster/afr/src/afr.h +@@ -40,6 +40,8 @@ + #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify" + + #define AFR_HALO_MAX_LATENCY 99999 ++#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" ++ + + #define PFLAG_PENDING (1 << 0) + #define PFLAG_SBRAIN (1 << 1) +@@ -155,6 +157,7 @@ typedef struct _afr_private { + struct list_head ta_waitq; + struct list_head ta_onwireq; + ++ unsigned char *anon_inode; + unsigned char *child_up; + int64_t *child_latency; + unsigned char *local; +@@ -240,6 +243,11 @@ typedef struct _afr_private { + gf_boolean_t esh_granular; + gf_boolean_t consistent_io; + gf_boolean_t data_self_heal; /* on/off */ ++ gf_boolean_t use_anon_inode; ++ ++ /*For anon-inode handling */ ++ char anon_inode_name[NAME_MAX + 1]; ++ char anon_gfid_str[UUID_SIZE + 1]; + } afr_private_t; + + typedef enum { +@@ -1341,4 +1349,7 @@ afr_selfheal_childup(xlator_t *this, afr_private_t *priv); + void + afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, + unsigned char *replies); ++gf_boolean_t ++afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, ++ pid_t pid); + #endif /* __AFR_H__ */ +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 094a71f..1920284 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -3867,6 +3867,38 @@ out: + } + + static int ++set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, ++ int clusters) ++{ ++ xlator_t *xlator = NULL; ++ int i = 0; ++ int ret = -1; ++ glusterd_conf_t *conf = NULL; ++ xlator_t *this = NULL; ++ ++ this = THIS; ++ GF_VALIDATE_OR_GOTO("glusterd", this, out); ++ conf = this->private; ++ GF_VALIDATE_OR_GOTO(this->name, conf, out); ++ ++ if (conf->op_version < GD_OP_VERSION_7_1) ++ return 0; ++ xlator = first_of(graph); ++ ++ for (i = 0; i < clusters; i++) { ++ ret = xlator_set_fixed_option(xlator, "volume-id", ++ uuid_utoa(volinfo->volume_id)); ++ if (ret) ++ goto out; ++ ++ xlator = xlator->next; ++ } ++ ++out: ++ return ret; ++} ++ ++static int + volgen_graph_build_afr_clusters(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) + { +@@ -3906,6 +3938,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, + clusters = -1; + goto out; + } ++ ++ ret = set_volfile_id_option(graph, volinfo, clusters); ++ if (ret) { ++ clusters = -1; ++ goto out; ++ } ++ + if (!volinfo->arbiter_count) + goto out; + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +index 62acadf..c1ca190 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c +@@ -3789,4 +3789,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { + .voltype = "features/cloudsync", + .op_version = GD_OP_VERSION_7_0, + .flags = VOLOPT_FLAG_CLIENT_OPT}, ++ ++ {.key = "cluster.use-anonymous-inode", ++ .voltype = "cluster/replicate", ++ .op_version = GD_OP_VERSION_7_1, ++ .value = "yes", ++ .flags = VOLOPT_FLAG_CLIENT_OPT}, + {.key = NULL}}; +-- +1.8.3.1 + diff --git a/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch b/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch new file mode 100644 index 0000000..0f6249e --- /dev/null +++ b/SOURCES/0516-afr-return-EIO-for-gfid-split-brains.patch @@ -0,0 +1,338 @@ +From 8d24d891aade910b0bb86b27c25a8d2382e19ba0 Mon Sep 17 00:00:00 2001 +From: karthik-us <ksubrahm@redhat.com> +Date: Tue, 15 Dec 2020 15:04:19 +0530 +Subject: [PATCH 516/517] afr: return -EIO for gfid split-brains. + +Problem: +entry-self-heal-anon-dir-off.t was failing occasionally because +afr_gfid_split_brain_source() returned -1 instead of -EIO for +split-brains, causing the code to proceed to afr_lookup_done(), which +in turn succeeded the lookup if there was a parallel client side heal +going on. + +Fix: +Return -EIO instead of -1 so that lookp fails. + +Also, afr_selfheal_name() was using the same dict to get and set values. This +could be problematic if the caller passed local->xdata_req, since +setting a response in a request dict can lead to bugs.So changed it to use +separate request and response dicts. + +Upstream patch details: +> Fixes: #1739 +> Credits Pranith Karampuri <pranith.karampuri@phonepe.com> +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> +>Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443 +Upstream patch: https://github.com/gluster/glusterfs/pull/1819/ + +BUG: 1640148 +Signed-off-by: karthik-us <ksubrahm@redhat.com> +Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443 +Reviewed-on: https://code.engineering.redhat.com/gerrit/221209 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + xlators/cluster/afr/src/afr-common.c | 12 ++++++++---- + xlators/cluster/afr/src/afr-self-heal-common.c | 27 +++++++++++++------------- + xlators/cluster/afr/src/afr-self-heal-entry.c | 8 ++++---- + xlators/cluster/afr/src/afr-self-heal-name.c | 23 +++++++++++----------- + xlators/cluster/afr/src/afr-self-heal.h | 5 +++-- + xlators/cluster/afr/src/afr-self-heald.c | 2 +- + 6 files changed, 42 insertions(+), 35 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c +index 6f2da11..416012c 100644 +--- a/xlators/cluster/afr/src/afr-common.c ++++ b/xlators/cluster/afr/src/afr-common.c +@@ -2366,7 +2366,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + /* If we were called from glfsheal and there is still a gfid + * mismatch, succeed the lookup and let glfsheal print the + * response via gfid-heal-msg.*/ +- if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", ++ if (!dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", + &gfid_heal_msg)) + goto cant_interpret; + +@@ -2421,7 +2421,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) + goto error; + } + +- ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); ++ ret = dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", &gfid_heal_msg); + if (!ret) { + ret = dict_set_str_sizen(local->replies[read_subvol].xdata, + "gfid-heal-msg", gfid_heal_msg); +@@ -2768,9 +2768,12 @@ afr_lookup_selfheal_wrap(void *opaque) + local = frame->local; + this = frame->this; + loc_pargfid(&local->loc, pargfid); ++ if (!local->xattr_rsp) ++ local->xattr_rsp = dict_new(); + + ret = afr_selfheal_name(frame->this, pargfid, local->loc.name, +- &local->cont.lookup.gfid_req, local->xattr_req); ++ &local->cont.lookup.gfid_req, local->xattr_req, ++ local->xattr_rsp); + if (ret == -EIO) + goto unwind; + +@@ -2786,7 +2789,8 @@ afr_lookup_selfheal_wrap(void *opaque) + return 0; + + unwind: +- AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL); ++ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, local->xattr_rsp, ++ NULL); + return 0; + } + +diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c +index 0a8a7fd..0954d2c 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-common.c ++++ b/xlators/cluster/afr/src/afr-self-heal-common.c +@@ -245,7 +245,8 @@ int + afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, +- unsigned char *locked_on, int *src, dict_t *xdata) ++ unsigned char *locked_on, int *src, dict_t *req, ++ dict_t *rsp) + { + afr_private_t *priv = NULL; + char g1[64] = { +@@ -266,8 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "All the bricks should be up to resolve the gfid split " + "barin"); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SALL_BRICKS_UP_TO_RESOLVE); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, +@@ -277,8 +278,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + goto out; + } + +- if (xdata) { +- ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op); ++ if (req) { ++ ret = dict_get_int32_sizen(req, "heal-op", &heal_op); + if (ret) + goto fav_child; + } else { +@@ -292,8 +293,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + SNO_BIGGER_FILE); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SNO_BIGGER_FILE); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -310,8 +311,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + SNO_DIFF_IN_MTIME); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SNO_DIFF_IN_MTIME); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -323,7 +324,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + break; + + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: +- ret = dict_get_str_sizen(xdata, "child-name", &src_brick); ++ ret = dict_get_str_sizen(req, "child-name", &src_brick); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Error getting the source " +@@ -335,8 +336,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + if (*src == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + SERROR_GETTING_SRC_BRICK); +- if (xdata) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ if (rsp) { ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + SERROR_GETTING_SRC_BRICK); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -400,7 +401,7 @@ out: + uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx, + priv->children[src_idx]->name, src_idx, + uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2)); +- return -1; ++ return -EIO; + } + return 0; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c +index 20b07dd..a17dd93 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-entry.c ++++ b/xlators/cluster/afr/src/afr-self-heal-entry.c +@@ -399,7 +399,7 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this, + (ia_type == replies[i].poststat.ia_type)) { + ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, + bname, src_idx, i, locked_on, src, +- NULL); ++ NULL, NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Skipping conservative merge on the " +@@ -474,7 +474,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, + return ret; + + /* In case of type mismatch / unable to resolve gfid mismatch on the +- * entry, return -1.*/ ++ * entry, return -EIO.*/ + ret = afr_selfheal_detect_gfid_and_type_mismatch( + this, replies, inode, fd->inode->gfid, name, source, locked_on, &src); + +@@ -905,7 +905,7 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, + break; + } + +- if (ret == -1) { ++ if (ret == -EIO) { + /* gfid or type mismatch. */ + mismatch = _gf_true; + ret = 0; +@@ -1072,7 +1072,7 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source, + else + ret = afr_selfheal_entry_do_subvol(frame, this, fd, i); + +- if (ret == -1) { ++ if (ret == -EIO) { + /* gfid or type mismatch. */ + mismatch = _gf_true; + ret = 0; +diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c +index 51e3d8c..9ec2066 100644 +--- a/xlators/cluster/afr/src/afr-self-heal-name.c ++++ b/xlators/cluster/afr/src/afr-self-heal-name.c +@@ -217,7 +217,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies, + int source, unsigned char *sources, + int *gfid_idx, uuid_t pargfid, + const char *bname, inode_t *inode, +- unsigned char *locked_on, dict_t *xdata) ++ unsigned char *locked_on, dict_t *req, ++ dict_t *rsp) + { + int i = 0; + int gfid_idx_iter = -1; +@@ -245,11 +246,11 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies, + if (sources[i] || source == -1) { + if ((sources[gfid_idx_iter] || source == -1) && + gf_uuid_compare(gfid, gfid1)) { +- ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, +- bname, gfid_idx_iter, i, +- locked_on, gfid_idx, xdata); ++ ret = afr_gfid_split_brain_source( ++ this, replies, inode, pargfid, bname, gfid_idx_iter, i, ++ locked_on, gfid_idx, req, rsp); + if (!ret && *gfid_idx >= 0) { +- ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", ++ ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", + "GFID split-brain resolved"); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, +@@ -303,7 +304,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + unsigned char *sources, unsigned char *sinks, + unsigned char *healed_sinks, int source, + unsigned char *locked_on, struct afr_reply *replies, +- void *gfid_req, dict_t *xdata) ++ void *gfid_req, dict_t *req, dict_t *rsp) + { + int gfid_idx = -1; + int ret = -1; +@@ -333,7 +334,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + + ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources, + &gfid_idx, pargfid, bname, +- inode, locked_on, xdata); ++ inode, locked_on, req, rsp); + if (ret) + return ret; + +@@ -450,7 +451,7 @@ out: + int + afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + uuid_t pargfid, const char *bname, void *gfid_req, +- dict_t *xdata) ++ dict_t *req, dict_t *rsp) + { + afr_private_t *priv = NULL; + unsigned char *sources = NULL; +@@ -505,7 +506,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, + + ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode, + sources, sinks, healed_sinks, source, +- locked_on, replies, gfid_req, xdata); ++ locked_on, replies, gfid_req, req, rsp); + } + unlock: + afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on, +@@ -578,7 +579,7 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this, + + int + afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname, +- void *gfid_req, dict_t *xdata) ++ void *gfid_req, dict_t *req, dict_t *rsp) + { + inode_t *parent = NULL; + call_frame_t *frame = NULL; +@@ -600,7 +601,7 @@ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname, + + if (need_heal) { + ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname, +- gfid_req, xdata); ++ gfid_req, req, rsp); + if (ret) + goto out; + } +diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h +index c8dc384..6b0bf69 100644 +--- a/xlators/cluster/afr/src/afr-self-heal.h ++++ b/xlators/cluster/afr/src/afr-self-heal.h +@@ -127,7 +127,7 @@ afr_throttled_selfheal(call_frame_t *frame, xlator_t *this); + + int + afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req, +- dict_t *xdata); ++ dict_t *req, dict_t *rsp); + + int + afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd); +@@ -357,7 +357,8 @@ int + afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, +- unsigned char *locked_on, int *src, dict_t *xdata); ++ unsigned char *locked_on, int *src, dict_t *req, ++ dict_t *rsp); + int + afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources, + unsigned char *sinks, +diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c +index 939a135..18aed93 100644 +--- a/xlators/cluster/afr/src/afr-self-heald.c ++++ b/xlators/cluster/afr/src/afr-self-heald.c +@@ -295,7 +295,7 @@ afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent, + { + int ret = -1; + +- ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL); ++ ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL, NULL); + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch b/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch new file mode 100644 index 0000000..bc1b263 --- /dev/null +++ b/SOURCES/0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch @@ -0,0 +1,388 @@ +From da75c2857fd8b173d47fb7fc3b925ffd14105f64 Mon Sep 17 00:00:00 2001 +From: "Kaleb S. KEITHLEY" <kkeithle@rhel7x.kkeithle.usersys.redhat.com> +Date: Wed, 23 Dec 2020 07:39:13 -0500 +Subject: [PATCH 517/517] gfapi: 'glfs_h_creat_open' - new API to create handle + and open fd + +Right now we have two separate APIs, one +- 'glfs_h_creat_handle' to create handle & another +- 'glfs_h_open' to create a glfd to return to application + +Having two separate routines can result in access errors +while trying to create and write into a read-only file. + +Since a fd is opened even during file/directory creation, +introducing a new API to make these two operations atomic i.e, +which can create both handle & fd and pass them to application + +This is backport of below mainline patch - +- https://review.gluster.org/#/c/glusterfs/+/23448/ +- bz#1753569 + +> Signed-off-by: Soumya Koduri <skoduri@redhat.com> +> Change-Id: Ibf513fcfcdad175f4d7eb6fa7a61b8feec6d33b5 +> release-6: commit 5a2af2fd06356f6fc79d591c352caffd4c511c9e +> master: commit 41a0f2aa755ec7162facd30209f2fa3f40308766 + +BUG: 1910119 +Change-Id: Ib397dbe82a6928d8f24251809d30febddd007bfc +Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/222083 +Reviewed-by: Soumya Koduri <skoduri@redhat.com> +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + api/src/gfapi.aliases | 1 + + api/src/gfapi.map | 5 ++ + api/src/glfs-handleops.c | 135 ++++++++++++++++++++++++++++++++++ + api/src/glfs-handles.h | 5 ++ + tests/basic/gfapi/glfs_h_creat_open.c | 118 +++++++++++++++++++++++++++++ + tests/basic/gfapi/glfs_h_creat_open.t | 27 +++++++ + 6 files changed, 291 insertions(+) + create mode 100644 tests/basic/gfapi/glfs_h_creat_open.c + create mode 100755 tests/basic/gfapi/glfs_h_creat_open.t + +diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases +index 692ae13..3d3415c 100644 +--- a/api/src/gfapi.aliases ++++ b/api/src/gfapi.aliases +@@ -197,3 +197,4 @@ _pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0 + _pub_glfs_setattr _glfs_setattr$GFAPI_6.0 + + _pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_6.4 ++_pub_glfs_h_creat_open _glfs_h_creat_open@GFAPI_6.6 +diff --git a/api/src/gfapi.map b/api/src/gfapi.map +index df65837..614f3f6 100644 +--- a/api/src/gfapi.map ++++ b/api/src/gfapi.map +@@ -276,3 +276,8 @@ GFAPI_6.4 { + global: + glfs_set_statedump_path; + } GFAPI_PRIVATE_6.1; ++ ++GFAPI_6.6 { ++ global: ++ glfs_h_creat_open; ++} GFAPI_6.4; +diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c +index d4e1545..7b8ff14 100644 +--- a/api/src/glfs-handleops.c ++++ b/api/src/glfs-handleops.c +@@ -843,6 +843,141 @@ invalid_fs: + GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2); + + struct glfs_object * ++pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, ++ const char *path, int flags, mode_t mode, ++ struct stat *stat, struct glfs_fd **out_fd) ++{ ++ int ret = -1; ++ struct glfs_fd *glfd = NULL; ++ xlator_t *subvol = NULL; ++ inode_t *inode = NULL; ++ loc_t loc = { ++ 0, ++ }; ++ struct iatt iatt = { ++ 0, ++ }; ++ uuid_t gfid; ++ dict_t *xattr_req = NULL; ++ struct glfs_object *object = NULL; ++ dict_t *fop_attr = NULL; ++ ++ /* validate in args */ ++ if ((fs == NULL) || (parent == NULL) || (path == NULL) || ++ (out_fd == NULL)) { ++ errno = EINVAL; ++ return NULL; ++ } ++ ++ DECLARE_OLD_THIS; ++ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs); ++ ++ /* get the active volume */ ++ subvol = glfs_active_subvol(fs); ++ if (!subvol) { ++ ret = -1; ++ goto out; ++ } ++ ++ /* get/refresh the in arg objects inode in correlation to the xlator */ ++ inode = glfs_resolve_inode(fs, subvol, parent); ++ if (!inode) { ++ ret = -1; ++ goto out; ++ } ++ ++ xattr_req = dict_new(); ++ if (!xattr_req) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ ++ gf_uuid_generate(gfid); ++ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true); ++ if (ret) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ ++ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path); ++ ++ glfd = glfs_fd_new(fs); ++ if (!glfd) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ ++ glfd->fd = fd_create(loc.inode, getpid()); ++ if (!glfd->fd) { ++ ret = -1; ++ errno = ENOMEM; ++ goto out; ++ } ++ glfd->fd->flags = flags; ++ ++ ret = get_fop_attr_thrd_key(&fop_attr); ++ if (ret) ++ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed"); ++ ++ /* fop/op */ ++ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt, xattr_req, ++ NULL); ++ DECODE_SYNCOP_ERR(ret); ++ ++ /* populate out args */ ++ if (ret == 0) { ++ glfd->fd->flags = flags; ++ ++ ret = glfs_loc_link(&loc, &iatt); ++ if (ret != 0) { ++ goto out; ++ } ++ ++ if (stat) ++ glfs_iatt_to_stat(fs, &iatt, stat); ++ ++ ret = glfs_create_object(&loc, &object); ++ } ++ ++out: ++ if (ret && object != NULL) { ++ /* Release the held reference */ ++ glfs_h_close(object); ++ object = NULL; ++ } ++ ++ loc_wipe(&loc); ++ ++ if (inode) ++ inode_unref(inode); ++ ++ if (fop_attr) ++ dict_unref(fop_attr); ++ ++ if (xattr_req) ++ dict_unref(xattr_req); ++ ++ if (ret && glfd) { ++ GF_REF_PUT(glfd); ++ } else if (glfd) { ++ glfd_set_state_bind(glfd); ++ *out_fd = glfd; ++ } ++ ++ glfs_subvol_done(fs, subvol); ++ ++ __GLFS_EXIT_FS; ++ ++invalid_fs: ++ return object; ++} ++ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6); ++ ++struct glfs_object * + pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, struct stat *stat) + { +diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h +index f7e6a06..4d039b9 100644 +--- a/api/src/glfs-handles.h ++++ b/api/src/glfs-handles.h +@@ -250,6 +250,11 @@ int + glfs_h_access(glfs_t *fs, glfs_object_t *object, int mask) __THROW + GFAPI_PUBLIC(glfs_h_access, 3.6.0); + ++struct glfs_object * ++glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, const char *path, ++ int flags, mode_t mode, struct stat *stat, ++ struct glfs_fd **out_fd) __THROW ++ GFAPI_PUBLIC(glfs_h_creat_open, 6.6); + /* + SYNOPSIS + +diff --git a/tests/basic/gfapi/glfs_h_creat_open.c b/tests/basic/gfapi/glfs_h_creat_open.c +new file mode 100644 +index 0000000..7672561 +--- /dev/null ++++ b/tests/basic/gfapi/glfs_h_creat_open.c +@@ -0,0 +1,118 @@ ++#include <fcntl.h> ++#include <unistd.h> ++#include <time.h> ++#include <limits.h> ++#include <string.h> ++#include <stdio.h> ++#include <errno.h> ++#include <stdlib.h> ++#include <glusterfs/api/glfs.h> ++#include <glusterfs/api/glfs-handles.h> ++ ++#define LOG_ERR(func, ret) \ ++ do { \ ++ if (ret != 0) { \ ++ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \ ++ ret, errno); \ ++ exit(1); \ ++ } else { \ ++ fprintf(stderr, "%s : returned %d\n", func, ret); \ ++ } \ ++ } while (0) ++#define LOG_IF_NO_ERR(func, ret) \ ++ do { \ ++ if (ret == 0) { \ ++ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \ ++ exit(1); \ ++ } else { \ ++ fprintf(stderr, "%s : returned %d\n", func, ret); \ ++ } \ ++ } while (0) ++int ++main(int argc, char *argv[]) ++{ ++ glfs_t *fs = NULL; ++ int ret = 0; ++ struct glfs_object *root = NULL, *leaf = NULL; ++ glfs_fd_t *fd = NULL; ++ char *filename = "/ro-file"; ++ struct stat sb = { ++ 0, ++ }; ++ char *logfile = NULL; ++ char *volname = NULL; ++ char *hostname = NULL; ++ char buf[32] = "abcdefghijklmnopqrstuvwxyz012345"; ++ ++ fprintf(stderr, "Starting glfs_h_creat_open\n"); ++ ++ if (argc != 4) { ++ fprintf(stderr, "Invalid argument\n"); ++ exit(1); ++ } ++ ++ hostname = argv[1]; ++ volname = argv[2]; ++ logfile = argv[3]; ++ ++ fs = glfs_new(volname); ++ if (!fs) { ++ fprintf(stderr, "glfs_new: returned NULL\n"); ++ return 1; ++ } ++ ++ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007); ++ LOG_ERR("glfs_set_volfile_server", ret); ++ ++ ret = glfs_set_logging(fs, logfile, 7); ++ LOG_ERR("glfs_set_logging", ret); ++ ++ ret = glfs_init(fs); ++ LOG_ERR("glfs_init", ret); ++ ++ sleep(2); ++ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0); ++ if (!root) { ++ ret = -1; ++ LOG_ERR("glfs_h_lookupat root", ret); ++ } ++ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0); ++ if (!leaf) { ++ ret = -1; ++ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret); ++ } ++ ++ leaf = glfs_h_creat_open(fs, root, filename, O_RDONLY, 00444, &sb, &fd); ++ if (!leaf || !fd) { ++ ret = -1; ++ LOG_ERR("glfs_h_creat leaf", ret); ++ } ++ fprintf(stderr, "glfs_h_create_open leaf - %p\n", leaf); ++ ++ ret = glfs_write(fd, buf, 32, 0); ++ if (ret < 0) { ++ fprintf(stderr, "glfs_write: error writing to file %s, %s\n", filename, ++ strerror(errno)); ++ goto out; ++ } ++ ++ ret = glfs_h_getattrs(fs, leaf, &sb); ++ LOG_ERR("glfs_h_getattrs", ret); ++ ++ if (sb.st_size != 32) { ++ fprintf(stderr, "glfs_write: post size mismatch\n"); ++ goto out; ++ } ++ ++ fprintf(stderr, "Successfully opened and written to a read-only file \n"); ++out: ++ if (fd) ++ glfs_close(fd); ++ ++ ret = glfs_fini(fs); ++ LOG_ERR("glfs_fini", ret); ++ ++ fprintf(stderr, "End of libgfapi_fini\n"); ++ ++ exit(0); ++} +diff --git a/tests/basic/gfapi/glfs_h_creat_open.t b/tests/basic/gfapi/glfs_h_creat_open.t +new file mode 100755 +index 0000000..f24ae73 +--- /dev/null ++++ b/tests/basic/gfapi/glfs_h_creat_open.t +@@ -0,0 +1,27 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../../include.rc ++. $(dirname $0)/../../volume.rc ++ ++cleanup; ++ ++TEST glusterd ++ ++TEST $CLI volume create $V0 $H0:$B0/brick1; ++EXPECT 'Created' volinfo_field $V0 'Status'; ++ ++TEST $CLI volume start $V0; ++EXPECT 'Started' volinfo_field $V0 'Status'; ++ ++logdir=`gluster --print-logdir` ++ ++TEST build_tester $(dirname $0)/glfs_h_creat_open.c -lgfapi ++ ++TEST ./$(dirname $0)/glfs_h_creat_open $H0 $V0 $logdir/glfs.log ++ ++cleanup_tester $(dirname $0)/glfs_h_creat_open ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume delete $V0 ++ ++cleanup; +-- +1.8.3.1 + diff --git a/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch b/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch new file mode 100644 index 0000000..00d29b9 --- /dev/null +++ b/SOURCES/0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch @@ -0,0 +1,41 @@ +From 818025e467ea98b32a855c92ba6aef6e172e029f Mon Sep 17 00:00:00 2001 +From: Nikhil Ladha <nladha@redhat.com> +Date: Fri, 8 Jan 2021 13:12:46 +0530 +Subject: [PATCH 518/526] glusterd: Fix for shared storage in ipv6 env + +Issue: +Mounting shared storage volume was failing in ipv6 env if the hostnames were FQDNs. +The brickname for the volume was being cut off, as a result, volume creation was failing. + +>Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e +>Fixes: #1406 +>Signed-off-by: nik-redhat <nladha@redhat.com> +Upstream patch: https://github.com/gluster/glusterfs/pull/1972 + +BUG: 1856574 + +Change-Id: Ib38993724c709b35b603f9ac666630c50c932c3e +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/223248 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +index 9597503..e9261af 100755 +--- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh ++++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +@@ -46,7 +46,7 @@ do + + key=`echo $line | cut -d ':' -f 1` + if [ "$key" == "Hostname" ]; then +- hostname=`echo $line | cut -d ':' -f 2 | xargs` ++ hostname=`echo $line | cut -d ' ' -f 2 | xargs` + fi + + if [ "$key" == "State" ]; then +-- +1.8.3.1 + diff --git a/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch b/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch new file mode 100644 index 0000000..f37acfd --- /dev/null +++ b/SOURCES/0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch @@ -0,0 +1,58 @@ +From 6ed227367b6eb7d6d7afde3859ad0a711a3adf36 Mon Sep 17 00:00:00 2001 +From: Leela Venkaiah G <gleelavenkaiah@gmail.com> +Date: Wed, 13 Jan 2021 16:02:25 +0530 +Subject: [PATCH 519/526] glusterfs-events: Fix incorrect attribute access + (#2002) + +Issue: When GlusterCmdException is raised, current code try to access +message atrribute which doesn't exist and resulting in a malformed +error string on failure operations + +Code Change: Replace `message` with `args[0]` + +>Fixes: #2001 +>Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb +>Signed-off-by: Leela Venkaiah G <lgangava@redhat.com> +Upstream patch: https://github.com/gluster/glusterfs/pull/2002 + +BUG: 1600459 +Change-Id: I65c9f0ee79310937a384025b8d454acda154e4bb +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/223584 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + events/src/peer_eventsapi.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py +index 26b77a0..c388da4 100644 +--- a/events/src/peer_eventsapi.py ++++ b/events/src/peer_eventsapi.py +@@ -174,9 +174,9 @@ def sync_to_peers(args): + sync_file_to_peers(WEBHOOKS_FILE_TO_SYNC) + except GlusterCmdException as e: + # Print stdout if stderr is empty +- errmsg = e.message[2] if e.message[2] else e.message[1] ++ errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1] + handle_output_error("Failed to sync Webhooks file: [Error: {0}]" +- "{1}".format(e.message[0], errmsg), ++ "{1}".format(e.args[0][0], errmsg), + errcode=ERROR_WEBHOOK_SYNC_FAILED, + json_output=args.json) + +@@ -185,9 +185,9 @@ def sync_to_peers(args): + sync_file_to_peers(CUSTOM_CONFIG_FILE_TO_SYNC) + except GlusterCmdException as e: + # Print stdout if stderr is empty +- errmsg = e.message[2] if e.message[2] else e.message[1] ++ errmsg = e.args[0][2] if e.args[0][2] else e.args[0][1] + handle_output_error("Failed to sync Config file: [Error: {0}]" +- "{1}".format(e.message[0], errmsg), ++ "{1}".format(e.args[0][0], errmsg), + errcode=ERROR_CONFIG_SYNC_FAILED, + json_output=args.json) + +-- +1.8.3.1 + diff --git a/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch b/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch new file mode 100644 index 0000000..c46a9ca --- /dev/null +++ b/SOURCES/0520-performance-open-behind-seek-fop-should-open_and_res.patch @@ -0,0 +1,70 @@ +From a3fd2c9d85bbd23131c985599d9c9d74f66f32d2 Mon Sep 17 00:00:00 2001 +From: Pranith Kumar K <pkarampu@redhat.com> +Date: Thu, 10 Oct 2019 10:50:59 +0530 +Subject: [PATCH 520/526] performance/open-behind: seek fop should + open_and_resume + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/23530 +> fixes: bz#1760187 +> Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9 +> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> + +BUG: 1830713 +Change-Id: I4c6ad13194d4fc5c7705e35bf9a27fce504b51f9 +Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/224484 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/open-behind/src/open-behind.c | 27 +++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 268c717..3ee3c40 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -711,6 +711,32 @@ err: + } + + int ++ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ++ gf_seek_what_t what, dict_t *xdata) ++{ ++ call_stub_t *stub = NULL; ++ fd_t *wind_fd = NULL; ++ ++ wind_fd = ob_get_wind_fd(this, fd, NULL); ++ ++ stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what, ++ xdata); ++ ++ fd_unref(wind_fd); ++ ++ if (!stub) ++ goto err; ++ ++ open_and_resume(this, wind_fd, stub); ++ ++ return 0; ++err: ++ STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); ++ ++ return 0; ++} ++ ++int + ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { + call_stub_t *stub = NULL; +@@ -1276,6 +1302,7 @@ struct xlator_fops fops = { + .flush = ob_flush, + .fsync = ob_fsync, + .fstat = ob_fstat, ++ .seek = ob_seek, + .ftruncate = ob_ftruncate, + .fsetxattr = ob_fsetxattr, + .setxattr = ob_setxattr, +-- +1.8.3.1 + diff --git a/SOURCES/0521-open-behind-fix-missing-fd-reference.patch b/SOURCES/0521-open-behind-fix-missing-fd-reference.patch new file mode 100644 index 0000000..8e18af8 --- /dev/null +++ b/SOURCES/0521-open-behind-fix-missing-fd-reference.patch @@ -0,0 +1,121 @@ +From 211d0f7dbb4991b2191925973222ebc79f010e84 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Sun, 8 Mar 2020 18:36:45 +0100 +Subject: [PATCH 521/526] open-behind: fix missing fd reference + +Open behind was not keeping any reference on fd's pending to be +opened. This makes it possible that a concurrent close and en entry +fop (unlink, rename, ...) caused destruction of the fd while it +was still being used. + +Upstream patch: +> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24204 +> Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +> Fixes: bz#1810934 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +Change-Id: Ie9e992902cf2cd7be4af1f8b4e57af9bd6afd8e9 +BUG: 1830713 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/224485 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/open-behind/src/open-behind.c | 27 ++++++++++++++--------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 3ee3c40..dd2f2fd 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -206,8 +206,13 @@ ob_fd_free(ob_fd_t *ob_fd) + if (ob_fd->xdata) + dict_unref(ob_fd->xdata); + +- if (ob_fd->open_frame) ++ if (ob_fd->open_frame) { ++ /* If we sill have a frame it means that background open has never ++ * been triggered. We need to release the pending reference. */ ++ fd_unref(ob_fd->fd); ++ + STACK_DESTROY(ob_fd->open_frame->root); ++ } + + GF_FREE(ob_fd); + } +@@ -297,6 +302,7 @@ ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + call_resume(stub); + } + ++ /* The background open is completed. We can release the 'fd' reference. */ + fd_unref(fd); + + STACK_DESTROY(frame->root); +@@ -331,7 +337,9 @@ ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) + } + + if (frame) { +- frame->local = fd_ref(fd); ++ /* We don't need to take a reference here. We already have a reference ++ * while the open is pending. */ ++ frame->local = fd; + + STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +@@ -345,15 +353,12 @@ void + ob_inode_wake(xlator_t *this, struct list_head *ob_fds) + { + ob_fd_t *ob_fd = NULL, *tmp = NULL; +- fd_t *fd = NULL; + + if (!list_empty(ob_fds)) { + list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) + { + ob_fd_wake(this, ob_fd->fd, ob_fd); +- fd = ob_fd->fd; + ob_fd_free(ob_fd); +- fd_unref(fd); + } + } + } +@@ -365,7 +370,7 @@ ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) + if (!src || !dst) + goto out; + +- dst->fd = __fd_ref(src->fd); ++ dst->fd = src->fd; + dst->loc.inode = inode_ref(src->loc.inode); + gf_uuid_copy(dst->loc.gfid, src->loc.gfid); + dst->flags = src->flags; +@@ -509,7 +514,6 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + + ob_fd->ob_inode = ob_inode; + +- /* don't do fd_ref, it'll cause leaks */ + ob_fd->fd = fd; + + ob_fd->open_frame = copy_frame(frame); +@@ -539,15 +543,16 @@ ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + } + UNLOCK(&fd->inode->lock); + +- if (!open_in_progress && !unlinked) { +- fd_ref(fd); ++ /* We take a reference while the background open is pending or being ++ * processed. If we finally wind the request in the foreground, then ++ * ob_fd_free() will take care of this additional reference. */ ++ fd_ref(fd); + ++ if (!open_in_progress && !unlinked) { + STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake(this, fd, NULL); +- +- fd_unref(fd); + } else { + ob_fd_free(ob_fd); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +-- +1.8.3.1 + diff --git a/SOURCES/0522-lcov-improve-line-coverage.patch b/SOURCES/0522-lcov-improve-line-coverage.patch new file mode 100644 index 0000000..13ece12 --- /dev/null +++ b/SOURCES/0522-lcov-improve-line-coverage.patch @@ -0,0 +1,746 @@ +From 46e2bbd52d4427c1348fa38dcb5d2b5f125555f1 Mon Sep 17 00:00:00 2001 +From: Amar Tumballi <amarts@redhat.com> +Date: Thu, 30 May 2019 15:25:01 +0530 +Subject: [PATCH 522/526] lcov: improve line coverage + +upcall: remove extra variable assignment and use just one + initialization. +open-behind: reduce the overall number of lines, in functions + not frequently called +selinux: reduce some lines in init failure cases + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22789 +> updates: bz#1693692 +> Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95 +> Signed-off-by: Amar Tumballi <amarts@redhat.com> + +BUG: 1830713 +Change-Id: I7c1de94f2ec76a5bfe1f48a9632879b18e5fbb95 +Signed-off-by: Amar Tumballi <amarts@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/224486 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/features/selinux/src/selinux.c | 6 +- + xlators/features/upcall/src/upcall.c | 108 +++++++--------------- + xlators/performance/open-behind/src/open-behind.c | 58 ++++-------- + 3 files changed, 55 insertions(+), 117 deletions(-) + +diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c +index 58b4c5d..e8e16cd 100644 +--- a/xlators/features/selinux/src/selinux.c ++++ b/xlators/features/selinux/src/selinux.c +@@ -234,7 +234,6 @@ init(xlator_t *this) + priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t); + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "out of memory"); +- ret = ENOMEM; + goto out; + } + +@@ -242,7 +241,6 @@ init(xlator_t *this) + + this->local_pool = mem_pool_new(selinux_priv_t, 64); + if (!this->local_pool) { +- ret = -1; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM, + "Failed to create local_t's memory pool"); + goto out; +@@ -252,9 +250,7 @@ init(xlator_t *this) + ret = 0; + out: + if (ret) { +- if (priv) { +- GF_FREE(priv); +- } ++ GF_FREE(priv); + mem_pool_destroy(this->local_pool); + } + return ret; +diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c +index 2583c50..0795f58 100644 +--- a/xlators/features/upcall/src/upcall.c ++++ b/xlators/features/upcall/src/upcall.c +@@ -57,14 +57,13 @@ static int32_t + up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -111,14 +110,13 @@ up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int count, off_t off, uint32_t flags, struct iobref *iobref, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -167,14 +165,13 @@ static int32_t + up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -220,14 +217,13 @@ static int32_t + up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -274,14 +270,13 @@ static int32_t + up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -343,14 +338,13 @@ static int32_t + up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -410,14 +404,13 @@ static int32_t + up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -472,14 +465,13 @@ static int32_t + up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -531,14 +523,13 @@ static int32_t + up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -592,14 +583,13 @@ static int32_t + up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -653,14 +643,13 @@ static int32_t + up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *params) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -717,15 +706,13 @@ static int32_t + up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *params) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); +- + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -774,14 +761,13 @@ out: + static int32_t + up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -826,14 +812,13 @@ out: + static int32_t + up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -852,14 +837,13 @@ err: + static int32_t + up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -879,14 +863,13 @@ static int32_t + up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -932,14 +915,13 @@ static int32_t + up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -986,14 +968,13 @@ static int32_t + up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1047,14 +1028,13 @@ static int32_t + up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1110,14 +1090,13 @@ static int32_t + up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath, + loc_t *loc, mode_t umask, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1164,14 +1143,13 @@ static int32_t + up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1216,14 +1194,13 @@ out: + static int32_t + up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1270,14 +1247,13 @@ static int32_t + up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1334,14 +1310,13 @@ static int32_t + up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1361,14 +1336,13 @@ static int32_t + up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1415,14 +1389,13 @@ static int32_t + up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1470,14 +1443,13 @@ static int32_t + up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1524,14 +1496,13 @@ static int + up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1577,14 +1548,13 @@ static int32_t + up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1652,14 +1622,13 @@ static int32_t + up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1727,14 +1696,13 @@ static int32_t + up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1800,7 +1768,7 @@ static int32_t + up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + dict_t *xattr = NULL; + +@@ -1808,13 +1776,11 @@ up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + + xattr = dict_for_key_value(name, "", 1, _gf_true); + if (!xattr) { +- op_errno = ENOMEM; + goto err; + } + + local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1885,7 +1851,7 @@ static int32_t + up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + dict_t *xattr = NULL; + +@@ -1893,13 +1859,11 @@ up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + + xattr = dict_for_key_value(name, "", 1, _gf_true); + if (!xattr) { +- op_errno = ENOMEM; + goto err; + } + + local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -1950,14 +1914,13 @@ static int32_t + up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +@@ -2000,14 +1963,13 @@ static int32_t + up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, + dict_t *xdata) + { +- int32_t op_errno = -1; ++ int32_t op_errno = ENOMEM; + upcall_local_t *local = NULL; + + EXIT_IF_UPCALL_OFF(this, out); + + local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL); + if (!local) { +- op_errno = ENOMEM; + goto err; + } + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index dd2f2fd..cbe89ec 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -581,7 +581,7 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + { + fd_t *old_fd = NULL; + int ret = -1; +- int op_errno = 0; ++ int op_errno = ENOMEM; + call_stub_t *stub = NULL; + + old_fd = fd_lookup(fd->inode, 0); +@@ -589,7 +589,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + /* open-behind only when this is the first FD */ + stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); + if (!stub) { +- op_errno = ENOMEM; + fd_unref(old_fd); + goto err; + } +@@ -603,7 +602,6 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + + ret = ob_open_behind(frame, this, loc, flags, fd, xdata); + if (ret) { +- op_errno = ENOMEM; + goto err; + } + +@@ -900,18 +898,12 @@ int + ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int cmd, struct gf_flock *flock, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_finodelk_stub(frame, default_finodelk_resume, volume, fd, cmd, +- flock, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); ++ call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume, ++ volume, fd, cmd, flock, xdata); ++ if (stub) ++ open_and_resume(this, fd, stub); ++ else ++ STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); + + return 0; + } +@@ -921,18 +913,12 @@ ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fentrylk_stub(frame, default_fentrylk_resume, volume, fd, +- basename, cmd, type, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); ++ call_stub_t *stub = fop_fentrylk_stub( ++ frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata); ++ if (stub) ++ open_and_resume(this, fd, stub); ++ else ++ STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); + + return 0; + } +@@ -941,18 +927,12 @@ int + ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, optype, xattr, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); ++ call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, ++ optype, xattr, xdata); ++ if (stub) ++ open_and_resume(this, fd, stub); ++ else ++ STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch b/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch new file mode 100644 index 0000000..621d5ae --- /dev/null +++ b/SOURCES/0523-open-behind-rewrite-of-internal-logic.patch @@ -0,0 +1,2720 @@ +From b924c8ca8a133fc9413c8ed1407e63f1658c7e79 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Tue, 12 May 2020 23:54:54 +0200 +Subject: [PATCH 523/526] open-behind: rewrite of internal logic + +There was a critical flaw in the previous implementation of open-behind. + +When an open is done in the background, it's necessary to take a +reference on the fd_t object because once we "fake" the open answer, +the fd could be destroyed. However as long as there's a reference, +the release function won't be called. So, if the application closes +the file descriptor without having actually opened it, there will +always remain at least 1 reference, causing a leak. + +To avoid this problem, the previous implementation didn't take a +reference on the fd_t, so there were races where the fd could be +destroyed while it was still in use. + +To fix this, I've implemented a new xlator cbk that gets called from +fuse when the application closes a file descriptor. + +The whole logic of handling background opens have been simplified and +it's more efficient now. Only if the fop needs to be delayed until an +open completes, a stub is created. Otherwise no memory allocations are +needed. + +Correctly handling the close request while the open is still pending +has added a bit of complexity, but overall normal operation is simpler. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24451 +> Change-Id: I6376a5491368e0e1c283cc452849032636261592 +> Fixes: #1225 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1830713 +Change-Id: I6376a5491368e0e1c283cc452849032636261592 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/224487 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + libglusterfs/src/fd.c | 26 + + libglusterfs/src/glusterfs/fd.h | 3 + + libglusterfs/src/glusterfs/xlator.h | 4 + + libglusterfs/src/libglusterfs.sym | 1 + + tests/basic/open-behind/open-behind.t | 183 +++ + tests/basic/open-behind/tester-fd.c | 99 ++ + tests/basic/open-behind/tester.c | 444 +++++++ + tests/basic/open-behind/tester.h | 145 +++ + tests/bugs/glusterfs/bug-873962-spb.t | 1 + + xlators/mount/fuse/src/fuse-bridge.c | 2 + + .../open-behind/src/open-behind-messages.h | 6 +- + xlators/performance/open-behind/src/open-behind.c | 1302 ++++++++------------ + 12 files changed, 1393 insertions(+), 823 deletions(-) + create mode 100644 tests/basic/open-behind/open-behind.t + create mode 100644 tests/basic/open-behind/tester-fd.c + create mode 100644 tests/basic/open-behind/tester.c + create mode 100644 tests/basic/open-behind/tester.h + +diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c +index 314546a..e4ec401 100644 +--- a/libglusterfs/src/fd.c ++++ b/libglusterfs/src/fd.c +@@ -501,6 +501,32 @@ out: + } + + void ++fd_close(fd_t *fd) ++{ ++ xlator_t *xl, *old_THIS; ++ ++ old_THIS = THIS; ++ ++ for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) { ++ if (!xl->call_cleanup) { ++ THIS = xl; ++ ++ if (IA_ISDIR(fd->inode->ia_type)) { ++ if (xl->cbks->fdclosedir != NULL) { ++ xl->cbks->fdclosedir(xl, fd); ++ } ++ } else { ++ if (xl->cbks->fdclose != NULL) { ++ xl->cbks->fdclose(xl, fd); ++ } ++ } ++ } ++ } ++ ++ THIS = old_THIS; ++} ++ ++void + fd_unref(fd_t *fd) + { + int32_t refcount = 0; +diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h +index cdbe289..4d157c4 100644 +--- a/libglusterfs/src/glusterfs/fd.h ++++ b/libglusterfs/src/glusterfs/fd.h +@@ -107,6 +107,9 @@ fd_ref(fd_t *fd); + void + fd_unref(fd_t *fd); + ++void ++fd_close(fd_t *fd); ++ + fd_t * + fd_create(struct _inode *inode, pid_t pid); + +diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h +index 8650ccc..273039a 100644 +--- a/libglusterfs/src/glusterfs/xlator.h ++++ b/libglusterfs/src/glusterfs/xlator.h +@@ -705,6 +705,8 @@ typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode); + + typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd); + ++typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd); ++ + struct xlator_cbks { + cbk_forget_t forget; + cbk_release_t release; +@@ -715,6 +717,8 @@ struct xlator_cbks { + cbk_ictxmerge_t ictxmerge; + cbk_inodectx_size_t ictxsize; + cbk_fdctx_size_t fdctxsize; ++ cbk_fdclose_t fdclose; ++ cbk_fdclose_t fdclosedir; + }; + + typedef int32_t (*dumpop_priv_t)(xlator_t *this); +diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym +index bc770e2..0a0862e 100644 +--- a/libglusterfs/src/libglusterfs.sym ++++ b/libglusterfs/src/libglusterfs.sym +@@ -456,6 +456,7 @@ event_unregister_close + fd_anonymous + fd_anonymous_with_flags + fd_bind ++fd_close + fd_create + fd_create_uint64 + __fd_ctx_del +diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t +new file mode 100644 +index 0000000..5e865d6 +--- /dev/null ++++ b/tests/basic/open-behind/open-behind.t +@@ -0,0 +1,183 @@ ++#!/bin/bash ++ ++WD="$(dirname "${0}")" ++ ++. ${WD}/../../include.rc ++. ${WD}/../../volume.rc ++ ++function assign() { ++ local _assign_var="${1}" ++ local _assign_value="${2}" ++ ++ printf -v "${_assign_var}" "%s" "${_assign_value}" ++} ++ ++function pipe_create() { ++ local _pipe_create_var="${1}" ++ local _pipe_create_name ++ local _pipe_create_fd ++ ++ _pipe_create_name="$(mktemp -u)" ++ mkfifo "${_pipe_create_name}" ++ exec {_pipe_create_fd}<>"${_pipe_create_name}" ++ rm "${_pipe_create_name}" ++ ++ assign "${_pipe_create_var}" "${_pipe_create_fd}" ++} ++ ++function pipe_close() { ++ local _pipe_close_fd="${!1}" ++ ++ exec {_pipe_close_fd}>&- ++} ++ ++function tester_start() { ++ declare -ag tester ++ local tester_in ++ local tester_out ++ ++ pipe_create tester_in ++ pipe_create tester_out ++ ++ ${WD}/tester <&${tester_in} >&${tester_out} & ++ ++ tester=("$!" "${tester_in}" "${tester_out}") ++} ++ ++function tester_send() { ++ declare -ag tester ++ local tester_res ++ local tester_extra ++ ++ echo "${*}" >&${tester[1]} ++ ++ read -t 3 -u ${tester[2]} tester_res tester_extra ++ echo "${tester_res} ${tester_extra}" ++ if [[ "${tester_res}" == "OK" ]]; then ++ return 0 ++ fi ++ ++ return 1 ++} ++ ++function tester_stop() { ++ declare -ag tester ++ local tester_res ++ ++ tester_send "quit" ++ ++ tester_res=0 ++ if ! wait ${tester[0]}; then ++ tester_res=$? ++ fi ++ ++ unset tester ++ ++ return ${tester_res} ++} ++ ++function count_open() { ++ local file="$(realpath "${B0}/${V0}/${1}")" ++ local count="0" ++ local inode ++ local ref ++ ++ inode="$(stat -c %i "${file}")" ++ ++ for fd in /proc/${BRICK_PID}/fd/*; do ++ ref="$(readlink "${fd}")" ++ if [[ "${ref}" == "${B0}/${V0}/"* ]]; then ++ if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then ++ count="$((${count} + 1))" ++ fi ++ fi ++ done ++ ++ echo "${count}" ++} ++ ++cleanup ++ ++TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c ++ ++TEST glusterd ++TEST pidof glusterd ++TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0} ++TEST ${CLI} volume set ${V0} flush-behind off ++TEST ${CLI} volume set ${V0} write-behind off ++TEST ${CLI} volume set ${V0} quick-read off ++TEST ${CLI} volume set ${V0} stat-prefetch on ++TEST ${CLI} volume set ${V0} io-cache off ++TEST ${CLI} volume set ${V0} open-behind on ++TEST ${CLI} volume set ${V0} lazy-open off ++TEST ${CLI} volume set ${V0} read-after-open off ++TEST ${CLI} volume start ${V0} ++ ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})" ++ ++TEST touch "${M0}/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_start ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT_WITHIN 5 "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${CLI} volume set ${V0} lazy-open on ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++sleep 2 ++EXPECT "0" count_open "/test" ++TEST tester_send fd write 0 "test" ++EXPECT "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT "0" count_open "/test" ++EXPECT "test" tester_send fd read 0 64 ++# Even though read-after-open is disabled, use-anonymous-fd is also disabled, ++# so reads need to open the file first. ++EXPECT "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT "0" count_open "/test" ++TEST tester_send fd open 1 "${M0}/test" ++EXPECT "2" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "1" count_open "/test" ++TEST tester_send fd close 1 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++TEST ${CLI} volume set ${V0} read-after-open on ++TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0}; ++ ++TEST tester_send fd open 0 "${M0}/test" ++EXPECT "0" count_open "/test" ++EXPECT "test" tester_send fd read 0 64 ++EXPECT "1" count_open "/test" ++TEST tester_send fd close 0 ++EXPECT_WITHIN 5 "0" count_open "/test" ++ ++EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 ++ ++TEST tester_stop ++ ++cleanup +diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c +new file mode 100644 +index 0000000..00f02bc +--- /dev/null ++++ b/tests/basic/open-behind/tester-fd.c +@@ -0,0 +1,99 @@ ++/* ++ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "tester.h" ++ ++#include <stdlib.h> ++#include <unistd.h> ++#include <sys/types.h> ++#include <sys/stat.h> ++#include <fcntl.h> ++#include <string.h> ++#include <ctype.h> ++#include <errno.h> ++ ++static int32_t ++fd_open(context_t *ctx, command_t *cmd) ++{ ++ obj_t *obj; ++ int32_t fd; ++ ++ obj = cmd->args[0].obj.ref; ++ ++ fd = open(cmd->args[1].str.data, O_RDWR); ++ if (fd < 0) { ++ return error(errno, "open() failed"); ++ } ++ ++ obj->type = OBJ_TYPE_FD; ++ obj->fd = fd; ++ ++ out_ok("%d", fd); ++ ++ return 0; ++} ++ ++static int32_t ++fd_close(context_t *ctx, command_t *cmd) ++{ ++ obj_t *obj; ++ ++ obj = cmd->args[0].obj.ref; ++ obj->type = OBJ_TYPE_NONE; ++ ++ if (close(obj->fd) != 0) { ++ return error(errno, "close() failed"); ++ } ++ ++ out_ok(); ++ ++ return 0; ++} ++ ++static int32_t ++fd_write(context_t *ctx, command_t *cmd) ++{ ++ ssize_t len, ret; ++ ++ len = strlen(cmd->args[1].str.data); ++ ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len); ++ if (ret < 0) { ++ return error(errno, "write() failed"); ++ } ++ ++ out_ok("%zd", ret); ++ ++ return 0; ++} ++ ++static int32_t ++fd_read(context_t *ctx, command_t *cmd) ++{ ++ char data[cmd->args[1].num.value + 1]; ++ ssize_t ret; ++ ++ ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value); ++ if (ret < 0) { ++ return error(errno, "read() failed"); ++ } ++ ++ data[ret] = 0; ++ ++ out_ok("%zd %s", ret, data); ++ ++ return 0; ++} ++ ++command_t fd_commands[] = { ++ {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))}, ++ {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))}, ++ {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))}, ++ {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))}, ++ CMD_END}; +diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c +new file mode 100644 +index 0000000..b2da71c +--- /dev/null ++++ b/tests/basic/open-behind/tester.c +@@ -0,0 +1,444 @@ ++/* ++ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#include "tester.h" ++ ++#include <stdlib.h> ++#include <unistd.h> ++#include <string.h> ++#include <ctype.h> ++#include <errno.h> ++ ++static void * ++mem_alloc(size_t size) ++{ ++ void *ptr; ++ ++ ptr = malloc(size); ++ if (ptr == NULL) { ++ error(ENOMEM, "Failed to allocate memory (%zu bytes)", size); ++ } ++ ++ return ptr; ++} ++ ++static void ++mem_free(void *ptr) ++{ ++ free(ptr); ++} ++ ++static bool ++buffer_create(context_t *ctx, size_t size) ++{ ++ ctx->buffer.base = mem_alloc(size); ++ if (ctx->buffer.base == NULL) { ++ return false; ++ } ++ ++ ctx->buffer.size = size; ++ ctx->buffer.len = 0; ++ ctx->buffer.pos = 0; ++ ++ return true; ++} ++ ++static void ++buffer_destroy(context_t *ctx) ++{ ++ mem_free(ctx->buffer.base); ++ ctx->buffer.size = 0; ++ ctx->buffer.len = 0; ++} ++ ++static int32_t ++buffer_get(context_t *ctx) ++{ ++ ssize_t len; ++ ++ if (ctx->buffer.pos >= ctx->buffer.len) { ++ len = read(0, ctx->buffer.base, ctx->buffer.size); ++ if (len < 0) { ++ return error(errno, "read() failed"); ++ } ++ if (len == 0) { ++ return 0; ++ } ++ ++ ctx->buffer.len = len; ++ ctx->buffer.pos = 0; ++ } ++ ++ return ctx->buffer.base[ctx->buffer.pos++]; ++} ++ ++static int32_t ++str_skip_spaces(context_t *ctx, int32_t current) ++{ ++ while ((current > 0) && (current != '\n') && isspace(current)) { ++ current = buffer_get(ctx); ++ } ++ ++ return current; ++} ++ ++static int32_t ++str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current) ++{ ++ uint32_t len; ++ ++ current = str_skip_spaces(ctx, current); ++ ++ len = 0; ++ while ((size > 0) && (current > 0) && (current != '\n') && ++ !isspace(current)) { ++ len++; ++ *buffer++ = current; ++ size--; ++ current = buffer_get(ctx); ++ } ++ ++ if (len == 0) { ++ return error(ENODATA, "Expecting a token"); ++ } ++ ++ if (size == 0) { ++ return error(ENOBUFS, "Token too long"); ++ } ++ ++ *buffer = 0; ++ ++ return current; ++} ++ ++static int32_t ++str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value, ++ int32_t current) ++{ ++ char text[32], *ptr; ++ uint64_t num; ++ ++ current = str_token(ctx, text, sizeof(text), current); ++ if (current > 0) { ++ num = strtoul(text, &ptr, 0); ++ if ((*ptr != 0) || (num < min) || (num > max)) { ++ return error(ERANGE, "Invalid number"); ++ } ++ *value = num; ++ } ++ ++ return current; ++} ++ ++static int32_t ++str_eol(context_t *ctx, int32_t current) ++{ ++ current = str_skip_spaces(ctx, current); ++ if (current != '\n') { ++ return error(EINVAL, "Expecting end of command"); ++ } ++ ++ return current; ++} ++ ++static void ++str_skip(context_t *ctx, int32_t current) ++{ ++ while ((current > 0) && (current != '\n')) { ++ current = buffer_get(ctx); ++ } ++} ++ ++static int32_t ++cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current) ++{ ++ obj_t *obj; ++ uint64_t id; ++ ++ current = str_number(ctx, 0, ctx->obj_count, &id, current); ++ if (current <= 0) { ++ return current; ++ } ++ ++ obj = &ctx->objs[id]; ++ if (obj->type != arg->obj.type) { ++ if (obj->type != OBJ_TYPE_NONE) { ++ return error(EBUSY, "Object is in use"); ++ } ++ return error(ENOENT, "Object is not defined"); ++ } ++ ++ arg->obj.ref = obj; ++ ++ return current; ++} ++ ++static int32_t ++cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current) ++{ ++ return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value, ++ current); ++} ++ ++static int32_t ++cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current) ++{ ++ return str_token(ctx, arg->str.data, arg->str.size, current); ++} ++ ++static int32_t ++cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current) ++{ ++ arg_t *arg; ++ ++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { ++ switch (arg->type) { ++ case ARG_TYPE_OBJ: ++ current = cmd_parse_obj(ctx, arg, current); ++ break; ++ case ARG_TYPE_NUM: ++ current = cmd_parse_num(ctx, arg, current); ++ break; ++ case ARG_TYPE_STR: ++ current = cmd_parse_str(ctx, arg, current); ++ break; ++ default: ++ return error(EINVAL, "Unknown argument type"); ++ } ++ } ++ ++ if (current < 0) { ++ return current; ++ } ++ ++ current = str_eol(ctx, current); ++ if (current <= 0) { ++ return error(EINVAL, "Syntax error"); ++ } ++ ++ return cmd->handler(ctx, cmd); ++} ++ ++static int32_t ++cmd_parse(context_t *ctx, command_t *cmds) ++{ ++ char text[32]; ++ command_t *cmd; ++ int32_t current; ++ ++ cmd = cmds; ++ do { ++ current = str_token(ctx, text, sizeof(text), buffer_get(ctx)); ++ if (current <= 0) { ++ return current; ++ } ++ ++ while (cmd->name != NULL) { ++ if (strcmp(cmd->name, text) == 0) { ++ if (cmd->handler != NULL) { ++ return cmd_parse_args(ctx, cmd, current); ++ } ++ cmd = cmd->cmds; ++ break; ++ } ++ cmd++; ++ } ++ } while (cmd->name != NULL); ++ ++ str_skip(ctx, current); ++ ++ return error(ENOTSUP, "Unknown command"); ++} ++ ++static void ++cmd_fini(context_t *ctx, command_t *cmds) ++{ ++ command_t *cmd; ++ arg_t *arg; ++ ++ for (cmd = cmds; cmd->name != NULL; cmd++) { ++ if (cmd->handler == NULL) { ++ cmd_fini(ctx, cmd->cmds); ++ } else { ++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { ++ switch (arg->type) { ++ case ARG_TYPE_STR: ++ mem_free(arg->str.data); ++ arg->str.data = NULL; ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ } ++} ++ ++static bool ++cmd_init(context_t *ctx, command_t *cmds) ++{ ++ command_t *cmd; ++ arg_t *arg; ++ ++ for (cmd = cmds; cmd->name != NULL; cmd++) { ++ if (cmd->handler == NULL) { ++ if (!cmd_init(ctx, cmd->cmds)) { ++ return false; ++ } ++ } else { ++ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) { ++ switch (arg->type) { ++ case ARG_TYPE_STR: ++ arg->str.data = mem_alloc(arg->str.size); ++ if (arg->str.data == NULL) { ++ return false; ++ } ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ } ++ ++ return true; ++} ++ ++static bool ++objs_create(context_t *ctx, uint32_t count) ++{ ++ uint32_t i; ++ ++ ctx->objs = mem_alloc(sizeof(obj_t) * count); ++ if (ctx->objs == NULL) { ++ return false; ++ } ++ ctx->obj_count = count; ++ ++ for (i = 0; i < count; i++) { ++ ctx->objs[i].type = OBJ_TYPE_NONE; ++ } ++ ++ return true; ++} ++ ++static int32_t ++objs_destroy(context_t *ctx) ++{ ++ uint32_t i; ++ int32_t err; ++ ++ err = 0; ++ for (i = 0; i < ctx->obj_count; i++) { ++ if (ctx->objs[i].type != OBJ_TYPE_NONE) { ++ err = error(ENOTEMPTY, "Objects not destroyed"); ++ break; ++ } ++ } ++ ++ mem_free(ctx->objs); ++ ctx->objs = NULL; ++ ctx->obj_count = 0; ++ ++ return err; ++} ++ ++static context_t * ++init(size_t size, uint32_t objs, command_t *cmds) ++{ ++ context_t *ctx; ++ ++ ctx = mem_alloc(sizeof(context_t)); ++ if (ctx == NULL) { ++ goto failed; ++ } ++ ++ if (!buffer_create(ctx, size)) { ++ goto failed_ctx; ++ } ++ ++ if (!objs_create(ctx, objs)) { ++ goto failed_buffer; ++ } ++ ++ if (!cmd_init(ctx, cmds)) { ++ goto failed_objs; ++ } ++ ++ ctx->active = true; ++ ++ return ctx; ++ ++failed_objs: ++ cmd_fini(ctx, cmds); ++ objs_destroy(ctx); ++failed_buffer: ++ buffer_destroy(ctx); ++failed_ctx: ++ mem_free(ctx); ++failed: ++ return NULL; ++} ++ ++static int32_t ++fini(context_t *ctx, command_t *cmds) ++{ ++ int32_t ret; ++ ++ cmd_fini(ctx, cmds); ++ buffer_destroy(ctx); ++ ++ ret = objs_destroy(ctx); ++ ++ ctx->active = false; ++ ++ return ret; ++} ++ ++static int32_t ++exec_quit(context_t *ctx, command_t *cmd) ++{ ++ ctx->active = false; ++ ++ return 0; ++} ++ ++static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)}, ++ {"quit", exec_quit, CMD_ARGS()}, ++ CMD_END}; ++ ++int32_t ++main(int32_t argc, char *argv[]) ++{ ++ context_t *ctx; ++ int32_t res; ++ ++ ctx = init(1024, 16, commands); ++ if (ctx == NULL) { ++ return 1; ++ } ++ ++ do { ++ res = cmd_parse(ctx, commands); ++ if (res < 0) { ++ out_err(-res); ++ } ++ } while (ctx->active); ++ ++ res = fini(ctx, commands); ++ if (res >= 0) { ++ out_ok(); ++ return 0; ++ } ++ ++ out_err(-res); ++ ++ return 1; ++} +diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h +new file mode 100644 +index 0000000..64e940c +--- /dev/null ++++ b/tests/basic/open-behind/tester.h +@@ -0,0 +1,145 @@ ++/* ++ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com> ++ This file is part of GlusterFS. ++ ++ This file is licensed to you under your choice of the GNU Lesser ++ General Public License, version 3 or any later version (LGPLv3 or ++ later), or the GNU General Public License, version 2 (GPLv2), in all ++ cases as published by the Free Software Foundation. ++*/ ++ ++#ifndef __TESTER_H__ ++#define __TESTER_H__ ++ ++#include <stdio.h> ++#include <inttypes.h> ++#include <stdbool.h> ++ ++enum _obj_type; ++typedef enum _obj_type obj_type_t; ++ ++enum _arg_type; ++typedef enum _arg_type arg_type_t; ++ ++struct _buffer; ++typedef struct _buffer buffer_t; ++ ++struct _obj; ++typedef struct _obj obj_t; ++ ++struct _context; ++typedef struct _context context_t; ++ ++struct _arg; ++typedef struct _arg arg_t; ++ ++struct _command; ++typedef struct _command command_t; ++ ++enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD }; ++ ++enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR }; ++ ++struct _buffer { ++ char *base; ++ uint32_t size; ++ uint32_t len; ++ uint32_t pos; ++}; ++ ++struct _obj { ++ obj_type_t type; ++ union { ++ int32_t fd; ++ }; ++}; ++ ++struct _context { ++ obj_t *objs; ++ buffer_t buffer; ++ uint32_t obj_count; ++ bool active; ++}; ++ ++struct _arg { ++ arg_type_t type; ++ union { ++ struct { ++ obj_type_t type; ++ obj_t *ref; ++ } obj; ++ struct { ++ uint64_t value; ++ uint64_t min; ++ uint64_t max; ++ } num; ++ struct { ++ uint32_t size; ++ char *data; ++ } str; ++ }; ++}; ++ ++struct _command { ++ const char *name; ++ int32_t (*handler)(context_t *ctx, command_t *cmd); ++ union { ++ arg_t *args; ++ command_t *cmds; ++ }; ++}; ++ ++#define msg(_stream, _fmt, _args...) \ ++ do { \ ++ fprintf(_stream, _fmt "\n", ##_args); \ ++ fflush(_stream); \ ++ } while (0) ++ ++#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args) ++#define msg_err(_err, _fmt, _args...) \ ++ ({ \ ++ int32_t __msg_err = (_err); \ ++ msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err, \ ++ ##_args); \ ++ -__msg_err; \ ++ }) ++ ++#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args) ++#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args) ++#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args) ++ ++#define out_ok(_args...) msg_out("OK " _args) ++#define out_err(_err) msg_out("ERR %d", _err) ++ ++#define ARG_END \ ++ { \ ++ ARG_TYPE_NONE \ ++ } ++ ++#define CMD_ARGS1(_x, _args...) \ ++ .args = (arg_t[]) { _args } ++#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END) ++ ++#define CMD_SUB(_cmds) .cmds = _cmds ++ ++#define CMD_END \ ++ { \ ++ NULL, NULL, CMD_SUB(NULL) \ ++ } ++ ++#define ARG_VAL(_type) \ ++ { \ ++ ARG_TYPE_OBJ, .obj = {.type = _type } \ ++ } ++#define ARG_NUM(_min, _max) \ ++ { \ ++ ARG_TYPE_NUM, .num = {.min = _min, .max = _max } \ ++ } ++#define ARG_STR(_size) \ ++ { \ ++ ARG_TYPE_STR, .str = {.size = _size } \ ++ } ++ ++extern command_t fd_commands[]; ++ ++#endif /* __TESTER_H__ */ +\ No newline at end of file +diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t +index db84a22..db71cc0 100644 +--- a/tests/bugs/glusterfs/bug-873962-spb.t ++++ b/tests/bugs/glusterfs/bug-873962-spb.t +@@ -14,6 +14,7 @@ TEST $CLI volume set $V0 performance.io-cache off + TEST $CLI volume set $V0 performance.write-behind off + TEST $CLI volume set $V0 performance.stat-prefetch off + TEST $CLI volume set $V0 performance.read-ahead off ++TEST $CLI volume set $V0 performance.open-behind off + TEST $CLI volume set $V0 cluster.background-self-heal-count 0 + TEST $CLI volume start $V0 + TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable +diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c +index 919eea3..76b5809 100644 +--- a/xlators/mount/fuse/src/fuse-bridge.c ++++ b/xlators/mount/fuse/src/fuse-bridge.c +@@ -3398,6 +3398,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg, + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd); + ++ fd_close(state->fd); ++ + fuse_fd_ctx_destroy(this, state->fd); + fd_unref(fd); + +diff --git a/xlators/performance/open-behind/src/open-behind-messages.h b/xlators/performance/open-behind/src/open-behind-messages.h +index f250824..0e78917 100644 +--- a/xlators/performance/open-behind/src/open-behind-messages.h ++++ b/xlators/performance/open-behind/src/open-behind-messages.h +@@ -23,6 +23,10 @@ + */ + + GLFS_MSGID(OPEN_BEHIND, OPEN_BEHIND_MSG_XLATOR_CHILD_MISCONFIGURED, +- OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY); ++ OPEN_BEHIND_MSG_VOL_MISCONFIGURED, OPEN_BEHIND_MSG_NO_MEMORY, ++ OPEN_BEHIND_MSG_FAILED, OPEN_BEHIND_MSG_BAD_STATE); ++ ++#define OPEN_BEHIND_MSG_FAILED_STR "Failed to submit fop" ++#define OPEN_BEHIND_MSG_BAD_STATE_STR "Unexpected state" + + #endif /* _OPEN_BEHIND_MESSAGES_H_ */ +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index cbe89ec..e43fe73 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -16,6 +16,18 @@ + #include "open-behind-messages.h" + #include <glusterfs/glusterfs-acl.h> + ++/* Note: The initial design of open-behind was made to cover the simple case ++ * of open, read, close for small files. This pattern combined with ++ * quick-read can do the whole operation without a single request to the ++ * bricks (except the initial lookup). ++ * ++ * The way to do this has been improved, but the logic remains the same. ++ * Basically, this means that any operation sent to the fd or the inode ++ * that it's not a read, causes the open request to be sent to the ++ * bricks, and all future operations will be executed synchronously, ++ * including opens (it's reset once all fd's are closed). ++ */ ++ + typedef struct ob_conf { + gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe + e.g - fstat() readv() +@@ -32,1096 +44,754 @@ typedef struct ob_conf { + */ + } ob_conf_t; + +-typedef struct ob_inode { +- inode_t *inode; +- struct list_head resume_fops; +- struct list_head ob_fds; +- int count; +- int op_ret; +- int op_errno; +- gf_boolean_t open_in_progress; +- int unlinked; +-} ob_inode_t; ++/* A negative state represents an errno value negated. In this case the ++ * current operation cannot be processed. */ ++typedef enum _ob_state { ++ /* There are no opens on the inode or the first open is already ++ * completed. The current operation can be sent directly. */ ++ OB_STATE_READY = 0, + +-typedef struct ob_fd { +- call_frame_t *open_frame; +- loc_t loc; +- dict_t *xdata; +- int flags; +- int op_errno; +- ob_inode_t *ob_inode; +- fd_t *fd; +- gf_boolean_t opened; +- gf_boolean_t ob_inode_fops_waiting; +- struct list_head list; +- struct list_head ob_fds_on_inode; +-} ob_fd_t; ++ /* There's an open pending and it has been triggered. The current ++ * operation should be "stubbified" and processed with ++ * ob_stub_dispatch(). */ ++ OB_STATE_OPEN_TRIGGERED, + +-ob_inode_t * +-ob_inode_alloc(inode_t *inode) +-{ +- ob_inode_t *ob_inode = NULL; ++ /* There's an open pending but it has not been triggered. The current ++ * operation can be processed directly but using an anonymous fd. */ ++ OB_STATE_OPEN_PENDING, + +- ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); +- if (ob_inode == NULL) +- goto out; ++ /* The current operation is the first open on the inode. */ ++ OB_STATE_FIRST_OPEN ++} ob_state_t; + +- ob_inode->inode = inode; +- INIT_LIST_HEAD(&ob_inode->resume_fops); +- INIT_LIST_HEAD(&ob_inode->ob_fds); +-out: +- return ob_inode; +-} +- +-void +-ob_inode_free(ob_inode_t *ob_inode) +-{ +- if (ob_inode == NULL) +- goto out; ++typedef struct ob_inode { ++ /* List of stubs pending on the first open. Once the first open is ++ * complete, all these stubs will be resubmitted, and dependencies ++ * will be checked again. */ ++ struct list_head resume_fops; + +- list_del_init(&ob_inode->resume_fops); +- list_del_init(&ob_inode->ob_fds); ++ /* The inode this object references. */ ++ inode_t *inode; + +- GF_FREE(ob_inode); +-out: +- return; +-} ++ /* The fd from the first open sent to this inode. It will be set ++ * from the moment the open is processed until the open if fully ++ * executed or closed before actually opened. It's NULL in all ++ * other cases. */ ++ fd_t *first_fd; ++ ++ /* The stub from the first open operation. When open fop starts ++ * being processed, it's assigned the OB_OPEN_PREPARING value ++ * until the actual stub is created. This is necessary to avoid ++ * creating the stub inside a locked region. Once the stub is ++ * successfully created, it's assigned here. This value is set ++ * to NULL once the stub is resumed. */ ++ call_stub_t *first_open; ++ ++ /* The total number of currently open fd's on this inode. */ ++ int32_t open_count; ++ ++ /* This flag is set as soon as we know that the open will be ++ * sent to the bricks, even before the stub is ready. */ ++ bool triggered; ++} ob_inode_t; + +-ob_inode_t * +-ob_inode_get(xlator_t *this, inode_t *inode) ++/* Dummy pointer used temporarily while the actual open stub is being created */ ++#define OB_OPEN_PREPARING ((call_stub_t *)-1) ++ ++#define OB_POST_COMMON(_fop, _xl, _frame, _fd, _args...) \ ++ case OB_STATE_FIRST_OPEN: \ ++ gf_smsg((_xl)->name, GF_LOG_ERROR, EINVAL, OPEN_BEHIND_MSG_BAD_STATE, \ ++ "fop=%s", #_fop, "state=%d", __ob_state, NULL); \ ++ default_##_fop##_failure_cbk(_frame, EINVAL); \ ++ break; \ ++ case OB_STATE_READY: \ ++ default_##_fop(_frame, _xl, ##_args); \ ++ break; \ ++ case OB_STATE_OPEN_TRIGGERED: { \ ++ call_stub_t *__ob_stub = fop_##_fop##_stub(_frame, ob_##_fop, \ ++ ##_args); \ ++ if (__ob_stub != NULL) { \ ++ ob_stub_dispatch(_xl, __ob_inode, _fd, __ob_stub); \ ++ break; \ ++ } \ ++ __ob_state = -ENOMEM; \ ++ } \ ++ default: \ ++ gf_smsg((_xl)->name, GF_LOG_ERROR, -__ob_state, \ ++ OPEN_BEHIND_MSG_FAILED, "fop=%s", #_fop, NULL); \ ++ default_##_fop##_failure_cbk(_frame, -__ob_state) ++ ++#define OB_POST_FD(_fop, _xl, _frame, _fd, _trigger, _args...) \ ++ do { \ ++ ob_inode_t *__ob_inode; \ ++ fd_t *__first_fd; \ ++ ob_state_t __ob_state = ob_open_and_resume_fd( \ ++ _xl, _fd, 0, true, _trigger, &__ob_inode, &__first_fd); \ ++ switch (__ob_state) { \ ++ case OB_STATE_OPEN_PENDING: \ ++ if (!(_trigger)) { \ ++ fd_t *__ob_fd = fd_anonymous_with_flags((_fd)->inode, \ ++ (_fd)->flags); \ ++ if (__ob_fd != NULL) { \ ++ default_##_fop(_frame, _xl, ##_args); \ ++ fd_unref(__ob_fd); \ ++ break; \ ++ } \ ++ __ob_state = -ENOMEM; \ ++ } \ ++ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ ++ } \ ++ } while (0) ++ ++#define OB_POST_FLUSH(_xl, _frame, _fd, _args...) \ ++ do { \ ++ ob_inode_t *__ob_inode; \ ++ fd_t *__first_fd; \ ++ ob_state_t __ob_state = ob_open_and_resume_fd( \ ++ _xl, _fd, 0, true, false, &__ob_inode, &__first_fd); \ ++ switch (__ob_state) { \ ++ case OB_STATE_OPEN_PENDING: \ ++ default_flush_cbk(_frame, NULL, _xl, 0, 0, NULL); \ ++ break; \ ++ OB_POST_COMMON(flush, _xl, _frame, __first_fd, ##_args); \ ++ } \ ++ } while (0) ++ ++#define OB_POST_INODE(_fop, _xl, _frame, _inode, _trigger, _args...) \ ++ do { \ ++ ob_inode_t *__ob_inode; \ ++ fd_t *__first_fd; \ ++ ob_state_t __ob_state = ob_open_and_resume_inode( \ ++ _xl, _inode, NULL, 0, true, _trigger, &__ob_inode, &__first_fd); \ ++ switch (__ob_state) { \ ++ case OB_STATE_OPEN_PENDING: \ ++ OB_POST_COMMON(_fop, _xl, _frame, __first_fd, ##_args); \ ++ } \ ++ } while (0) ++ ++static ob_inode_t * ++ob_inode_get_locked(xlator_t *this, inode_t *inode) + { + ob_inode_t *ob_inode = NULL; + uint64_t value = 0; +- int ret = 0; + +- if (!inode) +- goto out; ++ if ((__inode_ctx_get(inode, this, &value) == 0) && (value != 0)) { ++ return (ob_inode_t *)(uintptr_t)value; ++ } + +- LOCK(&inode->lock); +- { +- __inode_ctx_get(inode, this, &value); +- if (value == 0) { +- ob_inode = ob_inode_alloc(inode); +- if (ob_inode == NULL) +- goto unlock; +- +- value = (uint64_t)(uintptr_t)ob_inode; +- ret = __inode_ctx_set(inode, this, &value); +- if (ret < 0) { +- ob_inode_free(ob_inode); +- ob_inode = NULL; +- } +- } else { +- ob_inode = (ob_inode_t *)(uintptr_t)value; ++ ob_inode = GF_CALLOC(1, sizeof(*ob_inode), gf_ob_mt_inode_t); ++ if (ob_inode != NULL) { ++ ob_inode->inode = inode; ++ INIT_LIST_HEAD(&ob_inode->resume_fops); ++ ++ value = (uint64_t)(uintptr_t)ob_inode; ++ if (__inode_ctx_set(inode, this, &value) < 0) { ++ GF_FREE(ob_inode); ++ ob_inode = NULL; + } + } +-unlock: +- UNLOCK(&inode->lock); + +-out: + return ob_inode; + } + +-ob_fd_t * +-__ob_fd_ctx_get(xlator_t *this, fd_t *fd) ++static ob_state_t ++ob_open_and_resume_inode(xlator_t *xl, inode_t *inode, fd_t *fd, ++ int32_t open_count, bool synchronous, bool trigger, ++ ob_inode_t **pob_inode, fd_t **pfd) + { +- uint64_t value = 0; +- int ret = -1; +- ob_fd_t *ob_fd = NULL; ++ ob_conf_t *conf; ++ ob_inode_t *ob_inode; ++ call_stub_t *open_stub; + +- ret = __fd_ctx_get(fd, this, &value); +- if (ret) +- return NULL; ++ if (inode == NULL) { ++ return OB_STATE_READY; ++ } + +- ob_fd = (void *)((long)value); ++ conf = xl->private; + +- return ob_fd; +-} ++ *pfd = NULL; + +-ob_fd_t * +-ob_fd_ctx_get(xlator_t *this, fd_t *fd) +-{ +- ob_fd_t *ob_fd = NULL; +- +- LOCK(&fd->lock); ++ LOCK(&inode->lock); + { +- ob_fd = __ob_fd_ctx_get(this, fd); +- } +- UNLOCK(&fd->lock); +- +- return ob_fd; +-} ++ ob_inode = ob_inode_get_locked(xl, inode); ++ if (ob_inode == NULL) { ++ UNLOCK(&inode->lock); + +-int +-__ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +-{ +- uint64_t value = 0; +- int ret = -1; ++ return -ENOMEM; ++ } ++ *pob_inode = ob_inode; ++ ++ ob_inode->open_count += open_count; ++ ++ /* If first_fd is not NULL, it means that there's a previous open not ++ * yet completed. */ ++ if (ob_inode->first_fd != NULL) { ++ *pfd = ob_inode->first_fd; ++ /* If the current request doesn't trigger the open and it hasn't ++ * been triggered yet, we can continue without issuing the open ++ * only if the current request belongs to the same fd as the ++ * first one. */ ++ if (!trigger && !ob_inode->triggered && ++ (ob_inode->first_fd == fd)) { ++ UNLOCK(&inode->lock); ++ ++ return OB_STATE_OPEN_PENDING; ++ } + +- value = (long)((void *)ob_fd); ++ /* We need to issue the open. It could have already been triggered ++ * before. In this case open_stub will be NULL. Or the initial open ++ * may not be completely ready yet. In this case open_stub will be ++ * OB_OPEN_PREPARING. */ ++ open_stub = ob_inode->first_open; ++ ob_inode->first_open = NULL; ++ ob_inode->triggered = true; + +- ret = __fd_ctx_set(fd, this, value); ++ UNLOCK(&inode->lock); + +- return ret; +-} ++ if ((open_stub != NULL) && (open_stub != OB_OPEN_PREPARING)) { ++ call_resume(open_stub); ++ } + +-int +-ob_fd_ctx_set(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +-{ +- int ret = -1; ++ return OB_STATE_OPEN_TRIGGERED; ++ } + +- LOCK(&fd->lock); +- { +- ret = __ob_fd_ctx_set(this, fd, ob_fd); +- } +- UNLOCK(&fd->lock); ++ /* There's no pending open. Only opens can be non synchronous, so all ++ * regular fops will be processed directly. For non synchronous opens, ++ * we'll still process them normally (i.e. synchornous) if there are ++ * more file descriptors open. */ ++ if (synchronous || (ob_inode->open_count > open_count)) { ++ UNLOCK(&inode->lock); + +- return ret; +-} ++ return OB_STATE_READY; ++ } + +-ob_fd_t * +-ob_fd_new(void) +-{ +- ob_fd_t *ob_fd = NULL; ++ *pfd = fd; + +- ob_fd = GF_CALLOC(1, sizeof(*ob_fd), gf_ob_mt_fd_t); ++ /* This is the first open. We keep a reference on the fd and set ++ * first_open stub to OB_OPEN_PREPARING until the actual stub can ++ * be assigned (we don't create the stub here to avoid doing memory ++ * allocations inside the mutex). */ ++ ob_inode->first_fd = __fd_ref(fd); ++ ob_inode->first_open = OB_OPEN_PREPARING; + +- INIT_LIST_HEAD(&ob_fd->list); +- INIT_LIST_HEAD(&ob_fd->ob_fds_on_inode); ++ /* If lazy_open is not set, we'll need to immediately send the open, ++ * so we set triggered right now. */ ++ ob_inode->triggered = !conf->lazy_open; ++ } ++ UNLOCK(&inode->lock); + +- return ob_fd; ++ return OB_STATE_FIRST_OPEN; + } + +-void +-ob_fd_free(ob_fd_t *ob_fd) ++static ob_state_t ++ob_open_and_resume_fd(xlator_t *xl, fd_t *fd, int32_t open_count, ++ bool synchronous, bool trigger, ob_inode_t **pob_inode, ++ fd_t **pfd) + { +- LOCK(&ob_fd->fd->inode->lock); +- { +- list_del_init(&ob_fd->ob_fds_on_inode); +- } +- UNLOCK(&ob_fd->fd->inode->lock); +- +- loc_wipe(&ob_fd->loc); +- +- if (ob_fd->xdata) +- dict_unref(ob_fd->xdata); ++ uint64_t err; + +- if (ob_fd->open_frame) { +- /* If we sill have a frame it means that background open has never +- * been triggered. We need to release the pending reference. */ +- fd_unref(ob_fd->fd); +- +- STACK_DESTROY(ob_fd->open_frame->root); ++ if ((fd_ctx_get(fd, xl, &err) == 0) && (err != 0)) { ++ return (ob_state_t)-err; + } + +- GF_FREE(ob_fd); ++ return ob_open_and_resume_inode(xl, fd->inode, fd, open_count, synchronous, ++ trigger, pob_inode, pfd); + } + +-int +-ob_wake_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, +- int op_errno, fd_t *fd_ret, dict_t *xdata) ++static ob_state_t ++ob_open_behind(xlator_t *xl, fd_t *fd, int32_t flags, ob_inode_t **pob_inode, ++ fd_t **pfd) + { +- fd_t *fd = NULL; +- int count = 0; +- int ob_inode_op_ret = 0; +- int ob_inode_op_errno = 0; +- ob_fd_t *ob_fd = NULL; +- call_stub_t *stub = NULL, *tmp = NULL; +- ob_inode_t *ob_inode = NULL; +- gf_boolean_t ob_inode_fops_waiting = _gf_false; +- struct list_head fops_waiting_on_fd, fops_waiting_on_inode; ++ bool synchronous; + +- fd = frame->local; +- frame->local = NULL; +- +- INIT_LIST_HEAD(&fops_waiting_on_fd); +- INIT_LIST_HEAD(&fops_waiting_on_inode); ++ /* TODO: If O_CREAT, O_APPEND, O_WRONLY or O_DIRECT are specified, shouldn't ++ * we also execute this open synchronously ? */ ++ synchronous = (flags & O_TRUNC) != 0; + +- ob_inode = ob_inode_get(this, fd->inode); ++ return ob_open_and_resume_fd(xl, fd, 1, synchronous, true, pob_inode, pfd); ++} + +- LOCK(&fd->lock); ++static int32_t ++ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, ++ call_stub_t *stub) ++{ ++ LOCK(&ob_inode->inode->lock); + { +- ob_fd = __ob_fd_ctx_get(this, fd); +- ob_fd->opened = _gf_true; +- +- ob_inode_fops_waiting = ob_fd->ob_inode_fops_waiting; +- +- list_splice_init(&ob_fd->list, &fops_waiting_on_fd); +- +- if (op_ret < 0) { +- /* mark fd BAD for ever */ +- ob_fd->op_errno = op_errno; +- ob_fd = NULL; /*shouldn't be freed*/ +- } else { +- __fd_ctx_del(fd, this, NULL); +- } +- } +- UNLOCK(&fd->lock); +- +- if (ob_inode_fops_waiting) { +- LOCK(&fd->inode->lock); +- { +- count = --ob_inode->count; +- if (op_ret < 0) { +- /* TODO: when to reset the error? */ +- ob_inode->op_ret = -1; +- ob_inode->op_errno = op_errno; +- } +- +- if (count == 0) { +- ob_inode->open_in_progress = _gf_false; +- ob_inode_op_ret = ob_inode->op_ret; +- ob_inode_op_errno = ob_inode->op_errno; +- list_splice_init(&ob_inode->resume_fops, +- &fops_waiting_on_inode); +- } ++ /* We only queue a stub if the open has not been completed or ++ * cancelled. */ ++ if (ob_inode->first_fd == fd) { ++ list_add_tail(&stub->list, &ob_inode->resume_fops); ++ stub = NULL; + } +- UNLOCK(&fd->inode->lock); +- } +- +- if (ob_fd) +- ob_fd_free(ob_fd); +- +- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_fd, list) +- { +- list_del_init(&stub->list); +- +- if (op_ret < 0) +- call_unwind_error(stub, -1, op_errno); +- else +- call_resume(stub); + } ++ UNLOCK(&ob_inode->inode->lock); + +- list_for_each_entry_safe(stub, tmp, &fops_waiting_on_inode, list) +- { +- list_del_init(&stub->list); +- +- if (ob_inode_op_ret < 0) +- call_unwind_error(stub, -1, ob_inode_op_errno); +- else +- call_resume(stub); ++ if (stub != NULL) { ++ call_resume(stub); + } + +- /* The background open is completed. We can release the 'fd' reference. */ +- fd_unref(fd); +- +- STACK_DESTROY(frame->root); +- + return 0; + } + +-int +-ob_fd_wake(xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) ++static int32_t ++ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, ++ call_stub_t *stub) + { +- call_frame_t *frame = NULL; +- +- if (ob_fd == NULL) { +- LOCK(&fd->lock); +- { +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (!ob_fd) +- goto unlock; ++ bool closed; + +- frame = ob_fd->open_frame; +- ob_fd->open_frame = NULL; +- } +- unlock: +- UNLOCK(&fd->lock); +- } else { +- LOCK(&fd->lock); +- { +- frame = ob_fd->open_frame; +- ob_fd->open_frame = NULL; ++ LOCK(&ob_inode->inode->lock); ++ { ++ closed = ob_inode->first_fd != fd; ++ if (!closed) { ++ if (ob_inode->triggered) { ++ ob_inode->first_open = NULL; ++ } else { ++ ob_inode->first_open = stub; ++ stub = NULL; ++ } + } +- UNLOCK(&fd->lock); + } ++ UNLOCK(&ob_inode->inode->lock); + +- if (frame) { +- /* We don't need to take a reference here. We already have a reference +- * while the open is pending. */ +- frame->local = fd; +- +- STACK_WIND(frame, ob_wake_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, &ob_fd->loc, ob_fd->flags, fd, +- ob_fd->xdata); ++ if (stub != NULL) { ++ if (closed) { ++ call_stub_destroy(stub); ++ fd_unref(fd); ++ } else { ++ call_resume(stub); ++ } + } + + return 0; + } + +-void +-ob_inode_wake(xlator_t *this, struct list_head *ob_fds) ++static void ++ob_resume_pending(struct list_head *list) + { +- ob_fd_t *ob_fd = NULL, *tmp = NULL; ++ call_stub_t *stub; + +- if (!list_empty(ob_fds)) { +- list_for_each_entry_safe(ob_fd, tmp, ob_fds, ob_fds_on_inode) +- { +- ob_fd_wake(this, ob_fd->fd, ob_fd); +- ob_fd_free(ob_fd); +- } +- } +-} ++ while (!list_empty(list)) { ++ stub = list_first_entry(list, call_stub_t, list); ++ list_del_init(&stub->list); + +-/* called holding inode->lock and fd->lock */ +-void +-ob_fd_copy(ob_fd_t *src, ob_fd_t *dst) +-{ +- if (!src || !dst) +- goto out; +- +- dst->fd = src->fd; +- dst->loc.inode = inode_ref(src->loc.inode); +- gf_uuid_copy(dst->loc.gfid, src->loc.gfid); +- dst->flags = src->flags; +- dst->xdata = dict_ref(src->xdata); +- dst->ob_inode = src->ob_inode; +-out: +- return; ++ call_resume(stub); ++ } + } + +-int +-open_all_pending_fds_and_resume(xlator_t *this, inode_t *inode, +- call_stub_t *stub) ++static void ++ob_open_completed(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, int32_t op_ret, ++ int32_t op_errno) + { +- ob_inode_t *ob_inode = NULL; +- ob_fd_t *ob_fd = NULL, *tmp = NULL; +- gf_boolean_t was_open_in_progress = _gf_false; +- gf_boolean_t wait_for_open = _gf_false; +- struct list_head ob_fds; ++ struct list_head list; + +- ob_inode = ob_inode_get(this, inode); +- if (ob_inode == NULL) +- goto out; ++ INIT_LIST_HEAD(&list); + +- INIT_LIST_HEAD(&ob_fds); ++ if (op_ret < 0) { ++ fd_ctx_set(fd, xl, op_errno <= 0 ? EIO : op_errno); ++ } + +- LOCK(&inode->lock); ++ LOCK(&ob_inode->inode->lock); + { +- was_open_in_progress = ob_inode->open_in_progress; +- ob_inode->unlinked = 1; +- +- if (was_open_in_progress) { +- list_add_tail(&stub->list, &ob_inode->resume_fops); +- goto inode_unlock; +- } +- +- list_for_each_entry(ob_fd, &ob_inode->ob_fds, ob_fds_on_inode) +- { +- LOCK(&ob_fd->fd->lock); +- { +- if (ob_fd->opened) +- goto fd_unlock; +- +- ob_inode->count++; +- ob_fd->ob_inode_fops_waiting = _gf_true; +- +- if (ob_fd->open_frame == NULL) { +- /* open in progress no need of wake */ +- } else { +- tmp = ob_fd_new(); +- tmp->open_frame = ob_fd->open_frame; +- ob_fd->open_frame = NULL; +- +- ob_fd_copy(ob_fd, tmp); +- list_add_tail(&tmp->ob_fds_on_inode, &ob_fds); +- } +- } +- fd_unlock: +- UNLOCK(&ob_fd->fd->lock); +- } +- +- if (ob_inode->count) { +- wait_for_open = ob_inode->open_in_progress = _gf_true; +- list_add_tail(&stub->list, &ob_inode->resume_fops); ++ /* Only update the fields if the file has not been closed before ++ * getting here. */ ++ if (ob_inode->first_fd == fd) { ++ list_splice_init(&ob_inode->resume_fops, &list); ++ ob_inode->first_fd = NULL; ++ ob_inode->first_open = NULL; ++ ob_inode->triggered = false; + } + } +-inode_unlock: +- UNLOCK(&inode->lock); ++ UNLOCK(&ob_inode->inode->lock); + +-out: +- if (!was_open_in_progress) { +- if (!wait_for_open) { +- call_resume(stub); +- } else { +- ob_inode_wake(this, &ob_fds); +- } +- } ++ ob_resume_pending(&list); + +- return 0; ++ fd_unref(fd); + } + +-int +-open_and_resume(xlator_t *this, fd_t *fd, call_stub_t *stub) ++static int32_t ++ob_open_cbk(call_frame_t *frame, void *cookie, xlator_t *xl, int32_t op_ret, ++ int32_t op_errno, fd_t *fd, dict_t *xdata) + { +- ob_fd_t *ob_fd = NULL; +- int op_errno = 0; +- +- if (!fd) +- goto nofd; +- +- LOCK(&fd->lock); +- { +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (!ob_fd) +- goto unlock; ++ ob_inode_t *ob_inode; + +- if (ob_fd->op_errno) { +- op_errno = ob_fd->op_errno; +- goto unlock; +- } ++ ob_inode = frame->local; ++ frame->local = NULL; + +- list_add_tail(&stub->list, &ob_fd->list); +- } +-unlock: +- UNLOCK(&fd->lock); ++ ob_open_completed(xl, ob_inode, cookie, op_ret, op_errno); + +-nofd: +- if (op_errno) +- call_unwind_error(stub, -1, op_errno); +- else if (ob_fd) +- ob_fd_wake(this, fd, NULL); +- else +- call_resume(stub); ++ STACK_DESTROY(frame->root); + + return 0; + } + +-int +-ob_open_behind(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, ++static int32_t ++ob_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) + { +- ob_fd_t *ob_fd = NULL; +- int ret = -1; +- ob_conf_t *conf = NULL; +- ob_inode_t *ob_inode = NULL; +- gf_boolean_t open_in_progress = _gf_false; +- int unlinked = 0; +- +- conf = this->private; +- +- if (flags & O_TRUNC) { +- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- return 0; +- } +- +- ob_inode = ob_inode_get(this, fd->inode); +- +- ob_fd = ob_fd_new(); +- if (!ob_fd) +- goto enomem; +- +- ob_fd->ob_inode = ob_inode; +- +- ob_fd->fd = fd; +- +- ob_fd->open_frame = copy_frame(frame); +- if (!ob_fd->open_frame) +- goto enomem; +- ret = loc_copy(&ob_fd->loc, loc); +- if (ret) +- goto enomem; +- +- ob_fd->flags = flags; +- if (xdata) +- ob_fd->xdata = dict_ref(xdata); +- +- LOCK(&fd->inode->lock); +- { +- open_in_progress = ob_inode->open_in_progress; +- unlinked = ob_inode->unlinked; +- if (!open_in_progress && !unlinked) { +- ret = ob_fd_ctx_set(this, fd, ob_fd); +- if (ret) { +- UNLOCK(&fd->inode->lock); +- goto enomem; +- } +- +- list_add(&ob_fd->ob_fds_on_inode, &ob_inode->ob_fds); +- } +- } +- UNLOCK(&fd->inode->lock); +- +- /* We take a reference while the background open is pending or being +- * processed. If we finally wind the request in the foreground, then +- * ob_fd_free() will take care of this additional reference. */ +- fd_ref(fd); +- +- if (!open_in_progress && !unlinked) { +- STACK_UNWIND_STRICT(open, frame, 0, 0, fd, xdata); +- +- if (!conf->lazy_open) +- ob_fd_wake(this, fd, NULL); +- } else { +- ob_fd_free(ob_fd); +- STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), +- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); +- } ++ STACK_WIND_COOKIE(frame, ob_open_cbk, fd, FIRST_CHILD(this), ++ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + + return 0; +-enomem: +- if (ob_fd) { +- if (ob_fd->open_frame) +- STACK_DESTROY(ob_fd->open_frame->root); +- +- loc_wipe(&ob_fd->loc); +- if (ob_fd->xdata) +- dict_unref(ob_fd->xdata); +- +- GF_FREE(ob_fd); +- } +- +- return -1; + } + +-int ++static int32_t + ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) + { +- fd_t *old_fd = NULL; +- int ret = -1; +- int op_errno = ENOMEM; +- call_stub_t *stub = NULL; +- +- old_fd = fd_lookup(fd->inode, 0); +- if (old_fd) { +- /* open-behind only when this is the first FD */ +- stub = fop_open_stub(frame, default_open_resume, loc, flags, fd, xdata); +- if (!stub) { +- fd_unref(old_fd); +- goto err; +- } +- +- open_and_resume(this, old_fd, stub); ++ ob_inode_t *ob_inode; ++ call_frame_t *open_frame; ++ call_stub_t *stub; ++ fd_t *first_fd; ++ ob_state_t state; ++ ++ state = ob_open_behind(this, fd, flags, &ob_inode, &first_fd); ++ if (state == OB_STATE_READY) { ++ /* There's no pending open, but there are other file descriptors opened ++ * or the current flags require a synchronous open. */ ++ return default_open(frame, this, loc, flags, fd, xdata); ++ } + +- fd_unref(old_fd); ++ if (state == OB_STATE_OPEN_TRIGGERED) { ++ /* The first open is in progress (either because it was already issued ++ * or because this request triggered it). We try to create a new stub ++ * to retry the operation once the initial open completes. */ ++ stub = fop_open_stub(frame, ob_open, loc, flags, fd, xdata); ++ if (stub != NULL) { ++ return ob_stub_dispatch(this, ob_inode, first_fd, stub); ++ } + +- return 0; ++ state = -ENOMEM; + } + +- ret = ob_open_behind(frame, this, loc, flags, fd, xdata); +- if (ret) { +- goto err; +- } ++ if (state == OB_STATE_FIRST_OPEN) { ++ /* We try to create a stub for the new open. A new frame needs to be ++ * used because the current one may be destroyed soon after sending ++ * the open's reply. */ ++ open_frame = copy_frame(frame); ++ if (open_frame != NULL) { ++ stub = fop_open_stub(open_frame, ob_open_resume, loc, flags, fd, ++ xdata); ++ if (stub != NULL) { ++ open_frame->local = ob_inode; + +- return 0; +-err: +- gf_msg(this->name, GF_LOG_ERROR, op_errno, OPEN_BEHIND_MSG_NO_MEMORY, "%s", +- loc->path); ++ /* TODO: Previous version passed xdata back to the caller, but ++ * probably this doesn't make sense since it won't contain ++ * any requested data. I think it would be better to pass ++ * NULL for xdata. */ ++ default_open_cbk(frame, NULL, this, 0, 0, fd, xdata); + +- STACK_UNWIND_STRICT(open, frame, -1, op_errno, 0, 0); ++ return ob_open_dispatch(this, ob_inode, first_fd, stub); ++ } + +- return 0; +-} ++ STACK_DESTROY(open_frame->root); ++ } + +-fd_t * +-ob_get_wind_fd(xlator_t *this, fd_t *fd, uint32_t *flag) +-{ +- fd_t *wind_fd = NULL; +- ob_fd_t *ob_fd = NULL; +- ob_conf_t *conf = NULL; ++ /* In case of error, simulate a regular completion but with an error ++ * code. */ ++ ob_open_completed(this, ob_inode, first_fd, -1, ENOMEM); + +- conf = this->private; ++ state = -ENOMEM; ++ } + +- ob_fd = ob_fd_ctx_get(this, fd); ++ /* In case of failure we need to decrement the number of open files because ++ * ob_fdclose() won't be called. */ + +- if (ob_fd && ob_fd->open_frame && conf->use_anonymous_fd) { +- wind_fd = fd_anonymous(fd->inode); +- if ((ob_fd->flags & O_DIRECT) && (flag)) +- *flag = *flag | O_DIRECT; +- } else { +- wind_fd = fd_ref(fd); ++ LOCK(&fd->inode->lock); ++ { ++ ob_inode->open_count--; + } ++ UNLOCK(&fd->inode->lock); + +- return wind_fd; ++ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", ++ "open", "path=%s", loc->path, NULL); ++ ++ return default_open_failure_cbk(frame, -state); + } + +-int ++static int32_t + ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- fd_t *wind_fd = NULL; +- ob_conf_t *conf = NULL; ++ ob_conf_t *conf = this->private; ++ bool trigger = conf->read_after_open || !conf->use_anonymous_fd; + +- conf = this->private; +- +- if (!conf->read_after_open) +- wind_fd = ob_get_wind_fd(this, fd, &flags); +- else +- wind_fd = fd_ref(fd); +- +- stub = fop_readv_stub(frame, default_readv_resume, wind_fd, size, offset, +- flags, xdata); +- fd_unref(wind_fd); +- +- if (!stub) +- goto err; +- +- open_and_resume(this, wind_fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); ++ OB_POST_FD(readv, this, frame, fd, trigger, fd, size, offset, flags, xdata); + + return 0; + } + +-int ++static int32_t + ob_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_writev_stub(frame, default_writev_resume, fd, iov, count, offset, +- flags, iobref, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(writev, this, frame, fd, true, fd, iov, count, offset, flags, ++ iobref, xdata); + + return 0; + } + +-int ++static int32_t + ob_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- fd_t *wind_fd = NULL; +- +- wind_fd = ob_get_wind_fd(this, fd, NULL); +- +- stub = fop_fstat_stub(frame, default_fstat_resume, wind_fd, xdata); ++ ob_conf_t *conf = this->private; ++ bool trigger = !conf->use_anonymous_fd; + +- fd_unref(wind_fd); +- +- if (!stub) +- goto err; +- +- open_and_resume(this, wind_fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(fstat, this, frame, fd, trigger, fd, xdata); + + return 0; + } + +-int ++static int32_t + ob_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- fd_t *wind_fd = NULL; +- +- wind_fd = ob_get_wind_fd(this, fd, NULL); ++ ob_conf_t *conf = this->private; ++ bool trigger = !conf->use_anonymous_fd; + +- stub = fop_seek_stub(frame, default_seek_resume, wind_fd, offset, what, +- xdata); +- +- fd_unref(wind_fd); +- +- if (!stub) +- goto err; +- +- open_and_resume(this, wind_fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(seek, this, frame, fd, trigger, fd, offset, what, xdata); + + return 0; + } + +-int ++static int32_t + ob_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- ob_fd_t *ob_fd = NULL; +- gf_boolean_t unwind = _gf_false; +- +- LOCK(&fd->lock); +- { +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (ob_fd && ob_fd->open_frame) +- /* if open() was never wound to backend, +- no need to wind flush() either. +- */ +- unwind = _gf_true; +- } +- UNLOCK(&fd->lock); +- +- if (unwind) +- goto unwind; +- +- stub = fop_flush_stub(frame, default_flush_resume, fd, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, 0); +- +- return 0; +- +-unwind: +- STACK_UNWIND_STRICT(flush, frame, 0, 0, 0); ++ OB_POST_FLUSH(this, frame, fd, fd, xdata); + + return 0; + } + +-int ++static int32_t + ob_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fsync_stub(frame, default_fsync_resume, fd, flag, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(fsync, this, frame, fd, true, fd, flag, xdata); + + return 0; + } + +-int ++static int32_t + ob_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, flock, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(lk, this, frame, fd, true, fd, cmd, flock, xdata); + + return 0; + } + +-int ++static int32_t + ob_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(ftruncate, this, frame, fd, true, fd, offset, xdata); + + return 0; + } + +-int ++static int32_t + ob_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, + int flags, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, xattr, flags, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, 0); ++ OB_POST_FD(fsetxattr, this, frame, fd, true, fd, xattr, flags, xdata); + + return 0; + } + +-int ++static int32_t + ob_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(fgetxattr, this, frame, fd, true, fd, name, xdata); + + return 0; + } + +-int ++static int32_t + ob_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, 0); ++ OB_POST_FD(fremovexattr, this, frame, fd, true, fd, name, xdata); + + return 0; + } + +-int ++static int32_t + ob_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int cmd, struct gf_flock *flock, dict_t *xdata) + { +- call_stub_t *stub = fop_finodelk_stub(frame, default_finodelk_resume, +- volume, fd, cmd, flock, xdata); +- if (stub) +- open_and_resume(this, fd, stub); +- else +- STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, 0); ++ OB_POST_FD(finodelk, this, frame, fd, true, volume, fd, cmd, flock, xdata); + + return 0; + } + +-int ++static int32_t + ob_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) + { +- call_stub_t *stub = fop_fentrylk_stub( +- frame, default_fentrylk_resume, volume, fd, basename, cmd, type, xdata); +- if (stub) +- open_and_resume(this, fd, stub); +- else +- STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, 0); ++ OB_POST_FD(fentrylk, this, frame, fd, true, volume, fd, basename, cmd, type, ++ xdata); + + return 0; + } + +-int ++static int32_t + ob_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) + { +- call_stub_t *stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, +- optype, xattr, xdata); +- if (stub) +- open_and_resume(this, fd, stub); +- else +- STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, 0, 0); ++ OB_POST_FD(fxattrop, this, frame, fd, true, fd, optype, xattr, xdata); + + return 0; + } + +-int ++static int32_t + ob_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *iatt, + int valid, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, iatt, valid, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_FD(fsetattr, this, frame, fd, true, fd, iatt, valid, xdata); + + return 0; + } + +-int ++static int32_t + ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) + { +- call_stub_t *stub; +- +- stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset, +- len, xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); ++ OB_POST_FD(fallocate, this, frame, fd, true, fd, mode, offset, len, xdata); + + return 0; +-err: +- STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); +- return 0; + } + +-int ++static int32_t + ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) + { +- call_stub_t *stub; +- +- stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, +- xdata); +- if (!stub) +- goto err; +- +- open_and_resume(this, fd, stub); ++ OB_POST_FD(discard, this, frame, fd, true, fd, offset, len, xdata); + + return 0; +-err: +- STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); +- return 0; + } + +-int ++static int32_t + ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) + { +- call_stub_t *stub; +- +- stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, offset, len, +- xdata); +- if (!stub) +- goto err; ++ OB_POST_FD(zerofill, this, frame, fd, true, fd, offset, len, xdata); + +- open_and_resume(this, fd, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + +-int ++static int32_t + ob_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflags, xdata); +- if (!stub) +- goto err; +- +- open_all_pending_fds_and_resume(this, loc->inode, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, 0, 0, 0); ++ OB_POST_INODE(unlink, this, frame, loc->inode, true, loc, xflags, xdata); + + return 0; + } + +-int ++static int32_t + ob_rename(call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, + dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_rename_stub(frame, default_rename_resume, src, dst, xdata); +- if (!stub) +- goto err; +- +- open_all_pending_fds_and_resume(this, dst->inode, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); ++ OB_POST_INODE(rename, this, frame, dst->inode, true, src, dst, xdata); + + return 0; + } + +-int32_t ++static int32_t + ob_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- +- stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid, +- xdata); +- if (!stub) +- goto err; ++ OB_POST_INODE(setattr, this, frame, loc->inode, true, loc, stbuf, valid, ++ xdata); + +- open_all_pending_fds_and_resume(this, loc->inode, stub); +- +- return 0; +-err: +- STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + +-int32_t ++static int32_t + ob_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) + { +- call_stub_t *stub = NULL; +- gf_boolean_t access_xattr = _gf_false; +- + if (dict_get(dict, POSIX_ACL_DEFAULT_XATTR) || + dict_get(dict, POSIX_ACL_ACCESS_XATTR) || +- dict_get(dict, GF_SELINUX_XATTR_KEY)) +- access_xattr = _gf_true; +- +- if (!access_xattr) ++ dict_get(dict, GF_SELINUX_XATTR_KEY)) { + return default_setxattr(frame, this, loc, dict, flags, xdata); ++ } + +- stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags, +- xdata); +- if (!stub) +- goto err; +- +- open_all_pending_fds_and_resume(this, loc->inode, stub); ++ OB_POST_INODE(setxattr, this, frame, loc->inode, true, loc, dict, flags, ++ xdata); + + return 0; +-err: +- STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL); +- return 0; + } + +-int +-ob_release(xlator_t *this, fd_t *fd) ++static void ++ob_fdclose(xlator_t *this, fd_t *fd) + { +- ob_fd_t *ob_fd = NULL; ++ struct list_head list; ++ ob_inode_t *ob_inode; ++ call_stub_t *stub; ++ ++ INIT_LIST_HEAD(&list); ++ stub = NULL; + +- ob_fd = ob_fd_ctx_get(this, fd); ++ LOCK(&fd->inode->lock); ++ { ++ ob_inode = ob_inode_get_locked(this, fd->inode); ++ if (ob_inode != NULL) { ++ ob_inode->open_count--; ++ ++ /* If this fd is the same as ob_inode->first_fd, it means that ++ * the initial open has not fully completed. We'll try to cancel ++ * it. */ ++ if (ob_inode->first_fd == fd) { ++ if (ob_inode->first_open == OB_OPEN_PREPARING) { ++ /* In this case ob_open_dispatch() has not been called yet. ++ * We clear first_fd and first_open to allow that function ++ * to know that the open is not really needed. This also ++ * allows other requests to work as expected if they ++ * arrive before the dispatch function is called. If there ++ * are pending fops, we can directly process them here. ++ * (note that there shouldn't be any fd related fops, but ++ * if there are, it's fine if they fail). */ ++ ob_inode->first_fd = NULL; ++ ob_inode->first_open = NULL; ++ ob_inode->triggered = false; ++ list_splice_init(&ob_inode->resume_fops, &list); ++ } else if (!ob_inode->triggered) { ++ /* If the open has already been dispatched, we can only ++ * cancel it if it has not been triggered. Otherwise we ++ * simply wait until it completes. While it's not triggered, ++ * first_open must be a valid stub and there can't be any ++ * pending fops. */ ++ GF_ASSERT((ob_inode->first_open != NULL) && ++ list_empty(&ob_inode->resume_fops)); ++ ++ ob_inode->first_fd = NULL; ++ stub = ob_inode->first_open; ++ ob_inode->first_open = NULL; ++ } ++ } ++ } ++ } ++ UNLOCK(&fd->inode->lock); + +- ob_fd_free(ob_fd); ++ if (stub != NULL) { ++ call_stub_destroy(stub); ++ fd_unref(fd); ++ } + +- return 0; ++ ob_resume_pending(&list); + } + + int + ob_forget(xlator_t *this, inode_t *inode) + { +- ob_inode_t *ob_inode = NULL; ++ ob_inode_t *ob_inode; + uint64_t value = 0; + +- inode_ctx_del(inode, this, &value); +- +- if (value) { ++ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) { + ob_inode = (ob_inode_t *)(uintptr_t)value; +- ob_inode_free(ob_inode); ++ GF_FREE(ob_inode); + } + + return 0; +@@ -1153,20 +823,18 @@ ob_priv_dump(xlator_t *this) + int + ob_fdctx_dump(xlator_t *this, fd_t *fd) + { +- ob_fd_t *ob_fd = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; +- int ret = 0; ++ uint64_t value = 0; ++ int ret = 0, error = 0; + + ret = TRY_LOCK(&fd->lock); + if (ret) + return 0; + +- ob_fd = __ob_fd_ctx_get(this, fd); +- if (!ob_fd) { +- UNLOCK(&fd->lock); +- return 0; ++ if ((__fd_ctx_get(fd, this, &value) == 0) && (value != 0)) { ++ error = (int32_t)value; + } + + gf_proc_dump_build_key(key_prefix, "xlator.performance.open-behind", +@@ -1175,17 +843,7 @@ ob_fdctx_dump(xlator_t *this, fd_t *fd) + + gf_proc_dump_write("fd", "%p", fd); + +- gf_proc_dump_write("open_frame", "%p", ob_fd->open_frame); +- +- if (ob_fd->open_frame) +- gf_proc_dump_write("open_frame.root.unique", "%" PRIu64, +- ob_fd->open_frame->root->unique); +- +- gf_proc_dump_write("loc.path", "%s", ob_fd->loc.path); +- +- gf_proc_dump_write("loc.ino", "%s", uuid_utoa(ob_fd->loc.gfid)); +- +- gf_proc_dump_write("flags", "%d", ob_fd->flags); ++ gf_proc_dump_write("error", "%d", error); + + UNLOCK(&fd->lock); + +@@ -1307,7 +965,7 @@ struct xlator_fops fops = { + }; + + struct xlator_cbks cbks = { +- .release = ob_release, ++ .fdclose = ob_fdclose, + .forget = ob_forget, + }; + +-- +1.8.3.1 + diff --git a/SOURCES/0524-open-behind-fix-call_frame-leak.patch b/SOURCES/0524-open-behind-fix-call_frame-leak.patch new file mode 100644 index 0000000..75a243d --- /dev/null +++ b/SOURCES/0524-open-behind-fix-call_frame-leak.patch @@ -0,0 +1,70 @@ +From 36dddf59a02d91d3db5b124be626ab6bc235ed5a Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Wed, 19 Aug 2020 23:27:38 +0200 +Subject: [PATCH 524/526] open-behind: fix call_frame leak + +When an open was delayed, a copy of the frame was created because the +current frame was used to unwind the "fake" open. When the open was +actually sent, the frame was correctly destroyed. However if the file +was closed before needing to send the open, the frame was not destroyed. + +This patch correctly destroys the frame in all cases. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24892 +> Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +> Fixes: #1440 + +BUG: 1830713 +Change-Id: I8c00fc7f15545c240e8151305d9e4cf06d653926 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/224488 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/open-behind/src/open-behind.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index e43fe73..1ab635e 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -333,6 +333,14 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + return 0; + } + ++static void ++ob_open_destroy(call_stub_t *stub, fd_t *fd) ++{ ++ STACK_DESTROY(stub->frame->root); ++ call_stub_destroy(stub); ++ fd_unref(fd); ++} ++ + static int32_t + ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + call_stub_t *stub) +@@ -355,8 +363,7 @@ ob_open_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + + if (stub != NULL) { + if (closed) { +- call_stub_destroy(stub); +- fd_unref(fd); ++ ob_open_destroy(stub, fd); + } else { + call_resume(stub); + } +@@ -776,8 +783,7 @@ ob_fdclose(xlator_t *this, fd_t *fd) + UNLOCK(&fd->inode->lock); + + if (stub != NULL) { +- call_stub_destroy(stub); +- fd_unref(fd); ++ ob_open_destroy(stub, fd); + } + + ob_resume_pending(&list); +-- +1.8.3.1 + diff --git a/SOURCES/0525-open-behind-implement-create-fop.patch b/SOURCES/0525-open-behind-implement-create-fop.patch new file mode 100644 index 0000000..c7a5329 --- /dev/null +++ b/SOURCES/0525-open-behind-implement-create-fop.patch @@ -0,0 +1,109 @@ +From 41aae052b5e3afe64d3e0668643726bab0e77265 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 4 Sep 2020 14:49:50 +0200 +Subject: [PATCH 525/526] open-behind: implement create fop + +Open behind didn't implement create fop. This caused that files created +were not accounted for the number of open fd's. This could cause future +opens to be delayed when they shouldn't. + +This patch implements the create fop. It also fixes a problem when +destroying the stack: when frame->local was not NULL, STACK_DESTROY() +tried to mem_put() it, which is not correct. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24953 +> Fixes: #1440 +> Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1830713 +Change-Id: Ic982bad07d4af30b915d7eb1fbcef7a847a45869 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/224489 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/performance/open-behind/src/open-behind.c | 52 +++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c +index 1ab635e..600c3b6 100644 +--- a/xlators/performance/open-behind/src/open-behind.c ++++ b/xlators/performance/open-behind/src/open-behind.c +@@ -336,6 +336,7 @@ ob_stub_dispatch(xlator_t *xl, ob_inode_t *ob_inode, fd_t *fd, + static void + ob_open_destroy(call_stub_t *stub, fd_t *fd) + { ++ stub->frame->local = NULL; + STACK_DESTROY(stub->frame->root); + call_stub_destroy(stub); + fd_unref(fd); +@@ -516,6 +517,56 @@ ob_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + } + + static int32_t ++ob_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, ++ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) ++{ ++ ob_inode_t *ob_inode; ++ call_stub_t *stub; ++ fd_t *first_fd; ++ ob_state_t state; ++ ++ /* Create requests are never delayed. We always send them synchronously. */ ++ state = ob_open_and_resume_fd(this, fd, 1, true, true, &ob_inode, ++ &first_fd); ++ if (state == OB_STATE_READY) { ++ /* There's no pending open, but there are other file descriptors opened ++ * so we simply forward the request synchronously. */ ++ return default_create(frame, this, loc, flags, mode, umask, fd, xdata); ++ } ++ ++ if (state == OB_STATE_OPEN_TRIGGERED) { ++ /* The first open is in progress (either because it was already issued ++ * or because this request triggered it). We try to create a new stub ++ * to retry the operation once the initial open completes. */ ++ stub = fop_create_stub(frame, ob_create, loc, flags, mode, umask, fd, ++ xdata); ++ if (stub != NULL) { ++ return ob_stub_dispatch(this, ob_inode, first_fd, stub); ++ } ++ ++ state = -ENOMEM; ++ } ++ ++ /* Since we forced a synchronous request, OB_STATE_FIRST_OPEN will never ++ * be returned by ob_open_and_resume_fd(). If we are here it can only be ++ * because there has been a problem. */ ++ ++ /* In case of failure we need to decrement the number of open files because ++ * ob_fdclose() won't be called. */ ++ ++ LOCK(&fd->inode->lock); ++ { ++ ob_inode->open_count--; ++ } ++ UNLOCK(&fd->inode->lock); ++ ++ gf_smsg(this->name, GF_LOG_ERROR, -state, OPEN_BEHIND_MSG_FAILED, "fop=%s", ++ "create", "path=%s", loc->path, NULL); ++ ++ return default_create_failure_cbk(frame, -state); ++} ++ ++static int32_t + ob_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) + { +@@ -946,6 +997,7 @@ fini(xlator_t *this) + + struct xlator_fops fops = { + .open = ob_open, ++ .create = ob_create, + .readv = ob_readv, + .writev = ob_writev, + .flush = ob_flush, +-- +1.8.3.1 + diff --git a/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch b/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch new file mode 100644 index 0000000..fb74fd8 --- /dev/null +++ b/SOURCES/0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch @@ -0,0 +1,44 @@ +From baeca3c9b70548463ceea0ae27e6f98cf06e96b7 Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar <ssivakum@redhat.com> +Date: Tue, 28 Jul 2020 22:27:34 +0530 +Subject: [PATCH 526/526] Quota quota_fsck.py, converting byte string to string + +Issue: The quota_fsck.py script throws an TypeError +due to the fact that the data is read as bytes and then +the string operations are applied on the. Now, in python3 +string is unicode and hence we get the type error. + +Code Changes: +Decoding the bytes value into utf-8 format. + +>Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40 +>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +>Fixes: #1401 +Upstream patch: https://review.gluster.org/c/glusterfs/+/24785 + +BUG: 1719171 +Change-Id: Ia1ff52a821d664a371c8166692ff506ae39f6e40 +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/224780 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Kshithij Iyer <kiyer@redhat.com> +Reviewed-by: Rinku Kothiya <rkothiya@redhat.com> +--- + extras/quota/quota_fsck.py | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py +index 174f2a2..ea8d638 100755 +--- a/extras/quota/quota_fsck.py ++++ b/extras/quota/quota_fsck.py +@@ -157,6 +157,7 @@ def get_quota_xattr_brick(dpath): + xattr_dict['parents'] = {} + + for xattr in pairs: ++ xattr = xattr.decode("utf-8") + xattr_key = xattr.split("=")[0] + if re.search("# file:", xattr_key): + # skip the file comment +-- +1.8.3.1 + diff --git a/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch b/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch new file mode 100644 index 0000000..133a24e --- /dev/null +++ b/SOURCES/0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch @@ -0,0 +1,200 @@ +From 4152c77defac24ace3b1b6b9cc81a4f614254e4f Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar <ssivakum@redhat.com> +Date: Sat, 18 Jul 2020 05:59:09 +0530 +Subject: [PATCH 527/532] Events: Socket creation after getaddrinfo and IPv4 + and IPv6 packet capture + +Issue: Currently, the socket creation is done +prior to getaddrinfo function being invoked. This +can cause mismatch in the protocol and address +families of the created socket and the result +of the getaddrinfo api. Also, the glustereventsd +UDP server by default only captures IPv4 packets +hence IPv6 packets are not even captured. + +Code Changes: +1. Modified the socket creation in such a way that +the parameters taken in are dependent upon the +result of the getaddrinfo function. +2. Created a subclass for adding address family +in glustereventsd.py for both AF_INET and AF_INET6. +3. Modified addresses in the eventsapiconf.py.in + +Reasoning behind the approach: +1. If we are using getaddrinfo function then +socket creation should happen only after we +check if we received back valid addresses. +Hence socket creation should come after the call +to getaddrinfo +2. The listening server which pushes the events +to the webhook has to listen for both IPv4 +and IPv6 messages as we would not be sure as to +what address family is picked in _gf_event. + +>Fixes: #1377 +>Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760 +>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Upstream patch: https://review.gluster.org/c/glusterfs/+/24722 + +BUG: 1814744 +Change-Id: I568dcd1a977c8832f0fef981e1f81cac7043c760 +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/225567 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +--- + events/src/eventsapiconf.py.in | 2 ++ + events/src/glustereventsd.py | 37 ++++++++++++++++++++++++++++++------- + libglusterfs/src/events.c | 27 +++++++++++++++++++-------- + 3 files changed, 51 insertions(+), 15 deletions(-) + +diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in +index 76b5954..700093b 100644 +--- a/events/src/eventsapiconf.py.in ++++ b/events/src/eventsapiconf.py.in +@@ -28,6 +28,8 @@ def get_glusterd_workdir(): + return glusterd_workdir + + SERVER_ADDRESS = "0.0.0.0" ++SERVER_ADDRESSv4 = "0.0.0.0" ++SERVER_ADDRESSv6 = "::1" + DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json" + CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json" + CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC +diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py +index c4c7b65..341a3b6 100644 +--- a/events/src/glustereventsd.py ++++ b/events/src/glustereventsd.py +@@ -13,6 +13,7 @@ + from __future__ import print_function + import sys + import signal ++import threading + try: + import socketserver + except ImportError: +@@ -23,10 +24,17 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter + from eventtypes import all_events + import handlers + import utils +-from eventsapiconf import SERVER_ADDRESS, PID_FILE ++from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE + from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES + from utils import logger, PidFile, PidFileLockFailed, boolify + ++# Subclass so that specifically IPv4 packets are captured ++class UDPServerv4(socketserver.ThreadingUDPServer): ++ address_family = socket.AF_INET ++ ++# Subclass so that specifically IPv6 packets are captured ++class UDPServerv6(socketserver.ThreadingUDPServer): ++ address_family = socket.AF_INET6 + + class GlusterEventsRequestHandler(socketserver.BaseRequestHandler): + +@@ -89,6 +97,10 @@ def signal_handler_sigusr2(sig, frame): + utils.restart_webhook_pool() + + ++def UDP_server_thread(sock): ++ sock.serve_forever() ++ ++ + def init_event_server(): + utils.setup_logger() + utils.load_all() +@@ -99,15 +111,26 @@ def init_event_server(): + sys.stderr.write("Unable to get Port details from Config\n") + sys.exit(1) + +- # Start the Eventing Server, UDP Server ++ # Creating the Eventing Server, UDP Server for IPv4 packets ++ try: ++ serverv4 = UDPServerv4((SERVER_ADDRESSv4, port), ++ GlusterEventsRequestHandler) ++ except socket.error as e: ++ sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e)) ++ sys.exit(1) ++ # Creating the Eventing Server, UDP Server for IPv6 packets + try: +- server = socketserver.ThreadingUDPServer( +- (SERVER_ADDRESS, port), +- GlusterEventsRequestHandler) ++ serverv6 = UDPServerv6((SERVER_ADDRESSv6, port), ++ GlusterEventsRequestHandler) + except socket.error as e: +- sys.stderr.write("Failed to start Eventsd: {0}\n".format(e)) ++ sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e)) + sys.exit(1) +- server.serve_forever() ++ server_thread1 = threading.Thread(target=UDP_server_thread, ++ args=(serverv4,)) ++ server_thread2 = threading.Thread(target=UDP_server_thread, ++ args=(serverv6,)) ++ server_thread1.start() ++ server_thread2.start() + + + def get_args(): +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 6d1e383..4d720ca 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -40,6 +40,7 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + char *host = NULL; + struct addrinfo hints; + struct addrinfo *result = NULL; ++ struct addrinfo *iter_result_ptr = NULL; + xlator_t *this = THIS; + char *volfile_server_transport = NULL; + +@@ -51,13 +52,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + goto out; + } + +- /* Initialize UDP socket */ +- sock = socket(AF_INET, SOCK_DGRAM, 0); +- if (sock < 0) { +- ret = EVENT_ERROR_SOCKET; +- goto out; +- } +- + if (ctx) { + volfile_server_transport = ctx->cmd_args.volfile_server_transport; + } +@@ -66,7 +60,6 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + } + + /* host = NULL returns localhost */ +- host = NULL; + if (ctx && ctx->cmd_args.volfile_server && + (strcmp(volfile_server_transport, "unix"))) { + /* If it is client code then volfile_server is set +@@ -84,6 +77,24 @@ _gf_event(eventtypes_t event, const char *fmt, ...) + goto out; + } + ++ // iterate over the result and break when socket creation is success. ++ for (iter_result_ptr = result; iter_result_ptr != NULL; ++ iter_result_ptr = iter_result_ptr->ai_next) { ++ sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype, ++ iter_result_ptr->ai_protocol); ++ if (sock != -1) { ++ break; ++ } ++ } ++ /* ++ * If none of the addrinfo structures lead to a successful socket ++ * creation, socket creation has failed. ++ */ ++ if (sock < 0) { ++ ret = EVENT_ERROR_SOCKET; ++ goto out; ++ } ++ + va_start(arguments, fmt); + ret = gf_vasprintf(&msg, fmt, arguments); + va_end(arguments); +-- +1.8.3.1 + diff --git a/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch b/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch new file mode 100644 index 0000000..d04068d --- /dev/null +++ b/SOURCES/0528-Extras-Removing-xattr_analysis-script.patch @@ -0,0 +1,134 @@ +From 3fc74ce6c282f0f43fdcfeda47b71a1b19945b6d Mon Sep 17 00:00:00 2001 +From: srijan-sivakumar <ssivakum@redhat.com> +Date: Wed, 3 Feb 2021 10:11:04 +0530 +Subject: [PATCH 528/532] Extras: Removing xattr_analysis script + +The xattr_analysis.py script is used rarely for +debugging and seeing that it has some dependencies, +removing it from the release. + +If need be, it would be directly shared with the cu. + +Label: DOWNSTREAM ONLY +BUG: 1719171 + +Change-Id: I4bb0df3ebfa7e43e13858b4b6e3efbb02ea79d5f +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/226301 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/quota/Makefile.am | 4 +-- + extras/quota/xattr_analysis.py | 73 ------------------------------------------ + glusterfs.spec.in | 1 - + 3 files changed, 2 insertions(+), 76 deletions(-) + delete mode 100755 extras/quota/xattr_analysis.py + +diff --git a/extras/quota/Makefile.am b/extras/quota/Makefile.am +index cdb6be1..e4d9322 100644 +--- a/extras/quota/Makefile.am ++++ b/extras/quota/Makefile.am +@@ -2,7 +2,7 @@ scriptsdir = $(datadir)/glusterfs/scripts + scripts_SCRIPTS = log_accounting.sh + + if WITH_SERVER +-scripts_SCRIPTS += xattr_analysis.py quota_fsck.py ++scripts_SCRIPTS += quota_fsck.py + endif + +-EXTRA_DIST = log_accounting.sh xattr_analysis.py quota_fsck.py ++EXTRA_DIST = log_accounting.sh quota_fsck.py +diff --git a/extras/quota/xattr_analysis.py b/extras/quota/xattr_analysis.py +deleted file mode 100755 +index 7bd7d96..0000000 +--- a/extras/quota/xattr_analysis.py ++++ /dev/null +@@ -1,73 +0,0 @@ +-#!/usr/bin/python3 +-# Below script has two purposes +-# 1. Display xattr of entire FS tree in a human readable form +-# 2. Display all the directory where contri and size mismatch. +-# (If there are any directory with contri and size mismatch that are not dirty +-# then that highlights a propagation issue) +-# The script takes only one input LOG _FILE generated from the command, +-# find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr +- +-from __future__ import print_function +-import re +-import subprocess +-import sys +-from hurry.filesize import size +- +-if len(sys.argv) < 2: +- sys.exit('Usage: %s log_gluster_xattr \n' +- 'to generate log_gluster_xattr use: \n' +- 'find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr' +- % sys.argv[0]) +-LOG_FILE=sys.argv[1] +- +-def get_quota_xattr_brick(): +- out = subprocess.check_output (["/usr/bin/cat", LOG_FILE]) +- pairs = out.splitlines() +- +- xdict = {} +- mismatch_size = [('====contri_size===', '====size====')] +- for xattr in pairs: +- k = xattr.split("=")[0] +- if re.search("# file:", k): +- print(xdict) +- filename=k +- print("=====" + filename + "=======") +- xdict = {} +- elif k is "": +- pass +- else: +- print(xattr) +- v = xattr.split("=")[1] +- if re.search("contri", k): +- if len(v) == 34: +- # for files size is obtained in iatt, file count should be 1, dir count=0 +- xdict['contri_file_count'] = int(v[18:34], 16) +- xdict['contri_dir_count'] = 0 +- else: +- xdict['contri_size'] = size(int(v[2:18], 16)) +- xdict['contri_file_count'] = int(v[18:34], 16) +- xdict['contri_dir_count'] = int(v[34:], 16) +- elif re.search("size", k): +- xdict['size'] = size(int(v[2:18], 16)) +- xdict['file_count'] = int(v[18:34], 16) +- xdict['dir_count'] = int(v[34:], 16) +- elif re.search("dirty", k): +- if v == '0x3000': +- xdict['dirty'] = False +- elif v == '0x3100': +- xdict['dirty'] = True +- elif re.search("limit_objects", k): +- xdict['limit_objects'] = int(v[2:18], 16) +- elif re.search("limit_set", k): +- xdict['limit_set'] = size(int(v[2:18], 16)) +- +- if 'size' in xdict and 'contri_size' in xdict and xdict['size'] != xdict['contri_size']: +- mismatch_size.append((xdict['contri_size'], xdict['size'], filename)) +- +- for values in mismatch_size: +- print(values) +- +- +-if __name__ == '__main__': +- get_quota_xattr_brick() +- +diff --git a/glusterfs.spec.in b/glusterfs.spec.in +index 30d7162..2be7677 100644 +--- a/glusterfs.spec.in ++++ b/glusterfs.spec.in +@@ -1380,7 +1380,6 @@ exit 0 + %if ( 0%{!?_without_server:1} ) + %files server + %doc extras/clear_xattrs.sh +-%{_datadir}/glusterfs/scripts/xattr_analysis.py* + %{_datadir}/glusterfs/scripts/quota_fsck.py* + # sysconf + %config(noreplace) %{_sysconfdir}/glusterfs +-- +1.8.3.1 + diff --git a/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch b/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch new file mode 100644 index 0000000..671451d --- /dev/null +++ b/SOURCES/0529-geo-rep-prompt-should-work-for-ignore_deletes.patch @@ -0,0 +1,75 @@ +From 1c7e96e73273b7891ea6ef0d768c2bf7ff5de7b0 Mon Sep 17 00:00:00 2001 +From: Shwetha K Acharya <sacharya@redhat.com> +Date: Thu, 4 Feb 2021 16:29:39 +0530 +Subject: [PATCH 529/532] geo-rep: prompt should work for ignore_deletes + +The python cli is intelligent enough to parse both "-" and "_" alike: + +Example: +geo-replication config updated successfully +sync_job 4 +geo-replication config updated successfully +gluster volume geo-replication primary 127.0.0.1::secondary config | grep sync_jobs +sync_jobs:5 + +Thus the prompt which appears after ignore-deletes true should +work for both ignore-deletes and ignore_deletes. + +Label: DOWNSTREAM ONLY + +BUG: 1224906 +Change-Id: I89f854200a604d07d3ac6c374fe6d445ce9f22ca +Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/226599 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + cli/src/cli-cmd-parser.c | 5 +++-- + tests/00-geo-rep/bug-1708603.t | 12 ++++++++++-- + 2 files changed, 13 insertions(+), 4 deletions(-) + +diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c +index 34f17c9..dda8979 100644 +--- a/cli/src/cli-cmd-parser.c ++++ b/cli/src/cli-cmd-parser.c +@@ -3107,8 +3107,9 @@ cli_cmd_gsync_set_parse(struct cli_state *state, const char **words, + if (!ret) + ret = dict_set_int32(dict, "type", type); + if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) { +- if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") && +- !strcmp((char *)words[wordcount - 1], "true")) { ++ if ((((!strcmp((char *)words[wordcount - 2], "ignore_deletes")) || ++ (!strcmp((char *)words[wordcount - 2], "ignore-deletes")))) && ++ ((!strcmp((char *)words[wordcount - 1], "true")))) { + question = + "There exists ~15 seconds delay for the option to take" + " effect from stime of the corresponding brick. Please" +diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t +index 26913f1..edafb48 100644 +--- a/tests/00-geo-rep/bug-1708603.t ++++ b/tests/00-geo-rep/bug-1708603.t +@@ -44,11 +44,19 @@ TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 + #Create geo-rep session + TEST create_georep_session $master $slave + +-echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1 +-EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++echo n | $GEOREP_CLI $master $slave config ignore_deletes true >/dev/null 2>&1 ++EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore_deletes) ++ ++echo y | $GEOREP_CLI $master $slave config ignore_deletes true ++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore_deletes) ++ ++$GEOREP_CLI $master $slave config ignore_deletes false + echo y | $GEOREP_CLI $master $slave config ignore-deletes true + EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes) + ++echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1 ++EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes) ++ + #Stop Geo-rep + TEST $GEOREP_CLI $master $slave stop + +-- +1.8.3.1 + diff --git a/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch b/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch new file mode 100644 index 0000000..aec73b7 --- /dev/null +++ b/SOURCES/0530-gfapi-avoid-crash-while-logging-message.patch @@ -0,0 +1,41 @@ +From 5a7348a266587704dae4f1ddda16b7c95f547251 Mon Sep 17 00:00:00 2001 +From: Rinku Kothiya <rkothiya@redhat.com> +Date: Sun, 7 Feb 2021 13:40:24 +0000 +Subject: [PATCH 530/532] gfapi: avoid crash while logging message. + +Breaking parameter into two different parameter +to avoid a crash. + +Upstream: +> Reviewed-on: https://github.com/gluster/glusterfs/pull/2139 +> fixes: #2138 +> Change-Id: Idd5f3631488c1d892748f83e6847fb6fd2d0802a +> Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> + +BUG: 1691320 + +Change-Id: Ifd6a96982ffd4e5334f8be2297de2ad826f3145b +Signed-off-by: Rinku Kothiya <rkothiya@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/226851 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + api/src/glfs-fops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index 051541f..6dc3b66 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -1529,7 +1529,7 @@ glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + ret = -1; + errno = EINVAL; + gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG, +- "size >= %llu is not allowed", GF_UNIT_GB, NULL); ++ "Data size too large", "size=%llu", GF_UNIT_GB, NULL); + goto out; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch b/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch new file mode 100644 index 0000000..8c2ecbf --- /dev/null +++ b/SOURCES/0531-Glustereventsd-Default-port-change-2091.patch @@ -0,0 +1,69 @@ +From 058a853a1438b2a62586c545f71150ade3de23b7 Mon Sep 17 00:00:00 2001 +From: schaffung <ssivakum@redhat.com> +Date: Wed, 10 Feb 2021 13:43:48 +0530 +Subject: [PATCH 531/532] Glustereventsd Default port change (#2091) + +Issue : The default port of glustereventsd is currently 24009 +which is preventing glustereventsd from binding to the UDP port +due to selinux policies. + +Fix: Changing the default port to be bound by chanding it to something +in the ephemeral range. + +>Fixes: #2080 +>Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33 +>Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Upstream Patch : https://github.com/gluster/glusterfs/pull/2091 + +BUG: 1814744 +Change-Id: Ibdc87f83f82f69660dca95d6d14b226e10d8bd33 +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/227249 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + events/src/eventsconfig.json | 2 +- + extras/firewalld/glusterfs.xml | 2 +- + libglusterfs/src/events.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/events/src/eventsconfig.json b/events/src/eventsconfig.json +index 89e5b9c..14d8f84 100644 +--- a/events/src/eventsconfig.json ++++ b/events/src/eventsconfig.json +@@ -1,5 +1,5 @@ + { + "log-level": "INFO", +- "port": 24009, ++ "port": 55555, + "disable-events-log": false + } +diff --git a/extras/firewalld/glusterfs.xml b/extras/firewalld/glusterfs.xml +index 7e17644..dc74b2e 100644 +--- a/extras/firewalld/glusterfs.xml ++++ b/extras/firewalld/glusterfs.xml +@@ -4,7 +4,7 @@ + <description>Default ports for gluster-distributed storage</description> + <port protocol="tcp" port="24007"/> <!--For glusterd --> + <port protocol="tcp" port="24008"/> <!--For glusterd RDMA port management --> +-<port protocol="tcp" port="24009"/> <!--For glustereventsd --> ++<port protocol="tcp" port="55555"/> <!--For glustereventsd --> + <port protocol="tcp" port="38465"/> <!--Gluster NFS service --> + <port protocol="tcp" port="38466"/> <!--Gluster NFS service --> + <port protocol="tcp" port="38467"/> <!--Gluster NFS service --> +diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c +index 4d720ca..3659606 100644 +--- a/libglusterfs/src/events.c ++++ b/libglusterfs/src/events.c +@@ -26,7 +26,7 @@ + #include "glusterfs/events.h" + + #define EVENT_HOST "127.0.0.1" +-#define EVENT_PORT 24009 ++#define EVENT_PORT 55555 + + int + _gf_event(eventtypes_t event, const char *fmt, ...) +-- +1.8.3.1 + diff --git a/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch b/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch new file mode 100644 index 0000000..97e5aa7 --- /dev/null +++ b/SOURCES/0532-glusterd-fix-for-starting-brick-on-new-port.patch @@ -0,0 +1,79 @@ +From 2dad17fdbaab2ab2cda6a05dec9dcd2d37ea32ff Mon Sep 17 00:00:00 2001 +From: Nikhil Ladha <nladha@redhat.com> +Date: Wed, 10 Feb 2021 15:07:32 +0530 +Subject: [PATCH 532/532] glusterd: fix for starting brick on new port + +The Errno set by the runner code was not correct when the bind() fails +to assign an already occupied port in the __socket_server_bind(). + +Fix: +Updated the code to return the correct errno from the +__socket_server_bind() if the bind() fails due to EADDRINUSE error. And, +use the returned errno from runner_run() to retry allocating a new port +to the brick process. + +>Fixes: #1101 + +>Change-Id: If124337f41344a04f050754e402490529ef4ecdc +>Signed-off-by: nik-redhat nladha@redhat.com + +Upstream patch: https://github.com/gluster/glusterfs/pull/2090 + +BUG: 1865796 + +Change-Id: If124337f41344a04f050754e402490529ef4ecdc +Signed-off-by: nik-redhat <nladha@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/227261 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + rpc/rpc-transport/socket/src/socket.c | 3 +++ + xlators/mgmt/glusterd/src/glusterd-utils.c | 6 ++---- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c +index 1ee7320..96ed9f1 100644 +--- a/rpc/rpc-transport/socket/src/socket.c ++++ b/rpc/rpc-transport/socket/src/socket.c +@@ -973,8 +973,11 @@ __socket_server_bind(rpc_transport_t *this) + this->myinfo.identifier, strerror(errno)); + if (errno == EADDRINUSE) { + gf_log(this->name, GF_LOG_ERROR, "Port is already in use"); ++ ret = -EADDRINUSE; ++ goto out; + } + } ++ + if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) { + if (getsockname(priv->sock, SA(&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len) != 0) { +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index cf32bd9..bc188a2 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -2151,6 +2151,7 @@ glusterd_volume_start_glusterfs(glusterd_volinfo_t *volinfo, + ret = -1; + goto out; + } ++ + /* Build the exp_path, before starting the glusterfsd even in + valgrind mode. Otherwise all the glusterfsd processes start + writing the valgrind log to the same file. +@@ -2289,13 +2290,10 @@ retry: + + if (wait) { + synclock_unlock(&priv->big_lock); +- errno = 0; + ret = runner_run(&runner); +- if (errno != 0) +- ret = errno; + synclock_lock(&priv->big_lock); + +- if (ret == EADDRINUSE) { ++ if (ret == -EADDRINUSE) { + /* retry after getting a new port */ + gf_msg(this->name, GF_LOG_WARNING, -ret, + GD_MSG_SRC_BRICK_PORT_UNAVAIL, +-- +1.8.3.1 + diff --git a/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch b/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch new file mode 100644 index 0000000..158b4b7 --- /dev/null +++ b/SOURCES/0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch @@ -0,0 +1,250 @@ +From 854ab79dbef449c39adf66e3faebb4681359fce4 Mon Sep 17 00:00:00 2001 +From: mohit84 <moagrawa@redhat.com> +Date: Thu, 18 Feb 2021 09:40:44 +0530 +Subject: [PATCH 533/538] glusterd: Rebalance cli is not showing correct status + after reboot (#2172) + +Rebalance cli is not showing correct status after reboot. + +The CLI is not correct status because defrag object is not +valid at the time of creating a rpc connection to show the status. +The defrag object is not valid because at the time of start a glusterd +glusterd_restart_rebalance can be call almost at the same time by two +different synctask and glusterd got a disconnect on rpc object and it +cleanup the defrag object. + +Solution: To avoid the defrag object populate a reference count before + create a defrag rpc object. +>Fixes: #1339 +>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +>Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba +Upstream Patch : https://github.com/gluster/glusterfs/pull/2172 + +BUG: 1832306 +Change-Id: Ia284015d79beaa3d703ebabb92f26870a5aaafba +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/228249 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-rebalance.c | 35 ++++++++++----- + xlators/mgmt/glusterd/src/glusterd-syncop.c | 1 + + xlators/mgmt/glusterd/src/glusterd-utils.c | 59 +++++++++++++++++++++++++- + xlators/mgmt/glusterd/src/glusterd-utils.h | 5 +++ + xlators/mgmt/glusterd/src/glusterd.h | 1 + + 5 files changed, 90 insertions(+), 11 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +index b419a89..fcd5318 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c ++++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c +@@ -86,6 +86,7 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int pid = -1; ++ int refcnt = 0; + + this = THIS; + if (!this) +@@ -125,11 +126,12 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + } + + case RPC_CLNT_DISCONNECT: { +- if (!defrag->connected) +- return 0; +- + LOCK(&defrag->lock); + { ++ if (!defrag->connected) { ++ UNLOCK(&defrag->lock); ++ return 0; ++ } + defrag->connected = 0; + } + UNLOCK(&defrag->lock); +@@ -146,11 +148,11 @@ __glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata, + glusterd_defrag_rpc_put(defrag); + if (defrag->cbk_fn) + defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status); +- +- GF_FREE(defrag); ++ refcnt = glusterd_defrag_unref(defrag); + gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED, +- "Rebalance process for volume %s has disconnected.", +- volinfo->volname); ++ "Rebalance process for volume %s has disconnected" ++ " and defrag refcnt is %d.", ++ volinfo->volname, refcnt); + break; + } + case RPC_CLNT_DESTROY: +@@ -309,7 +311,11 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr, + gf_msg_debug("glusterd", 0, "rebalance command failed"); + goto out; + } +- ++ /* Take reference before sleep to save defrag object cleanup while ++ glusterd_restart_rebalance call for other bricks by syncktask ++ at the time of restart a glusterd. ++ */ ++ glusterd_defrag_ref(defrag); + sleep(5); + + ret = glusterd_rebalance_rpc_create(volinfo); +@@ -372,6 +378,7 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); ++ struct rpc_clnt *rpc = NULL; + + // rebalance process is not started + if (!defrag) +@@ -396,13 +403,21 @@ glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo) + } + + glusterd_volinfo_ref(volinfo); +- ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify, +- volinfo, _gf_true); ++ ret = glusterd_rpc_create(&rpc, options, glusterd_defrag_notify, volinfo, ++ _gf_false); + if (ret) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL, + "Glusterd RPC creation failed"); + goto out; + } ++ LOCK(&defrag->lock); ++ { ++ if (!defrag->rpc) ++ defrag->rpc = rpc; ++ else ++ rpc_clnt_unref(rpc); ++ } ++ UNLOCK(&defrag->lock); + ret = 0; + out: + if (options) +diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c +index df78fef..05c9e11 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c ++++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c +@@ -1732,6 +1732,7 @@ gd_brick_op_phase(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + if (!rpc) { + if (pending_node->type == GD_NODE_REBALANCE && pending_node->node) { + volinfo = pending_node->node; ++ glusterd_defrag_ref(volinfo->rebal.defrag); + ret = glusterd_rebalance_rpc_create(volinfo); + if (ret) { + ret = 0; +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index bc188a2..9fb8eab 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -93,6 +93,44 @@ + #define NLMV4_VERSION 4 + #define NLMV1_VERSION 1 + ++int ++glusterd_defrag_ref(glusterd_defrag_info_t *defrag) ++{ ++ int refcnt = 0; ++ ++ if (!defrag) ++ goto out; ++ ++ LOCK(&defrag->lock); ++ { ++ refcnt = ++defrag->refcnt; ++ } ++ UNLOCK(&defrag->lock); ++ ++out: ++ return refcnt; ++} ++ ++int ++glusterd_defrag_unref(glusterd_defrag_info_t *defrag) ++{ ++ int refcnt = -1; ++ ++ if (!defrag) ++ goto out; ++ ++ LOCK(&defrag->lock); ++ { ++ refcnt = --defrag->refcnt; ++ if (refcnt <= 0) ++ GF_FREE(defrag); ++ } ++ UNLOCK(&defrag->lock); ++ ++out: ++ return refcnt; ++} ++ + gf_boolean_t + is_brick_mx_enabled(void) + { +@@ -9370,6 +9408,7 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr, + char pidfile[PATH_MAX] = ""; + int ret = -1; + pid_t pid = 0; ++ int refcnt = 0; + + this = THIS; + GF_ASSERT(this); +@@ -9410,7 +9449,25 @@ glusterd_volume_defrag_restart(glusterd_volinfo_t *volinfo, char *op_errstr, + volinfo->volname); + goto out; + } +- ret = glusterd_rebalance_rpc_create(volinfo); ++ refcnt = glusterd_defrag_ref(volinfo->rebal.defrag); ++ /* If refcnt value is 1 it means either defrag object is ++ poulated by glusterd_rebalance_defrag_init or previous ++ rpc creation was failed.If it is not 1 it means it(defrag) ++ was populated at the time of start a rebalance daemon. ++ We need to create a rpc object only while a previous ++ rpc connection was not established successfully at the ++ time of restart a rebalance daemon by ++ glusterd_handle_defrag_start otherwise rebalance cli ++ does not show correct status after just reboot a node and try ++ to print the rebalance status because defrag object has been ++ destroyed during handling of rpc disconnect. ++ */ ++ if (refcnt == 1) { ++ ret = glusterd_rebalance_rpc_create(volinfo); ++ } else { ++ ret = 0; ++ glusterd_defrag_unref(volinfo->rebal.defrag); ++ } + break; + } + case GF_DEFRAG_STATUS_NOT_STARTED: +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h +index 02d85d2..4541471 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.h ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.h +@@ -886,4 +886,9 @@ int32_t + glusterd_check_brick_order(dict_t *dict, char *err_str, int32_t type, + int32_t sub_count); + ++int ++glusterd_defrag_ref(glusterd_defrag_info_t *defrag); ++ ++int ++glusterd_defrag_unref(glusterd_defrag_info_t *defrag); + #endif +diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h +index efe4d0e..9de3f28 100644 +--- a/xlators/mgmt/glusterd/src/glusterd.h ++++ b/xlators/mgmt/glusterd/src/glusterd.h +@@ -321,6 +321,7 @@ struct glusterd_defrag_info_ { + uint64_t total_data; + uint64_t num_files_lookedup; + uint64_t total_failures; ++ int refcnt; + gf_lock_t lock; + int cmd; + pthread_t th; +-- +1.8.3.1 + diff --git a/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch b/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch new file mode 100644 index 0000000..2dc72c1 --- /dev/null +++ b/SOURCES/0534-glusterd-Resolve-use-after-free-bug-2181.patch @@ -0,0 +1,47 @@ +From b3647eb5415b2e3d9e1a11ad6c4689e520f17b39 Mon Sep 17 00:00:00 2001 +From: mohit84 <moagrawa@redhat.com> +Date: Mon, 22 Feb 2021 10:09:34 +0530 +Subject: [PATCH 534/538] glusterd: Resolve use after free bug (#2181) + +In the commit 61ae58e67567ea4de8f8efc6b70a9b1f8e0f1bea +introduced a coverity bug use object after cleanup +the object. + +Cleanup memory after comeout from a critical section +>Fixes: #2180 + +>Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041 +>Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Upstream Patch : https://github.com/gluster/glusterfs/pull/2181 + +BUG: 1832306 +Change-Id: Iee2050c4883a0dd44b8523bb822b664462ab6041 +Signed-off-by: srijan-sivakumar <ssivakum@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/228578 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-utils.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c +index 9fb8eab..6d40be5 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-utils.c ++++ b/xlators/mgmt/glusterd/src/glusterd-utils.c +@@ -122,11 +122,10 @@ glusterd_defrag_unref(glusterd_defrag_info_t *defrag) + LOCK(&defrag->lock); + { + refcnt = --defrag->refcnt; +- if (refcnt <= 0) +- GF_FREE(defrag); + } + UNLOCK(&defrag->lock); +- ++ if (refcnt <= 0) ++ GF_FREE(defrag); + out: + return refcnt; + } +-- +1.8.3.1 + diff --git a/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch b/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch new file mode 100644 index 0000000..e1622de --- /dev/null +++ b/SOURCES/0535-multiple-files-use-dict_allocate_and_serialize-where.patch @@ -0,0 +1,270 @@ +From 775d500cd136bd8c940faaeffde1217c25a87e3d Mon Sep 17 00:00:00 2001 +From: Yaniv Kaul <ykaul@redhat.com> +Date: Sun, 2 Jun 2019 21:14:18 +0300 +Subject: [PATCH 535/538] (multiple files) use dict_allocate_and_serialize() + where applicable. + +This function does length, allocation and serialization for you. + +Upstream patch: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/22800 +> Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e +> updates: bz#1193929 +> Signed-off-by: Yaniv Kaul <ykaul@redhat.com> + +Change-Id: I142a259952a2fe83dd719442afaefe4a43a8e55e +BUG: 1911292 +Signed-off-by: Yaniv Kaul <ykaul@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/228611 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/afr/src/afr-inode-read.c | 34 +++++--------------------- + xlators/cluster/ec/src/ec-combine.c | 16 +++--------- + xlators/features/locks/src/posix.c | 23 +++-------------- + xlators/protocol/client/src/client-handshake.c | 14 +++-------- + xlators/protocol/server/src/server-handshake.c | 24 +++++++----------- + xlators/protocol/server/src/server-helpers.c | 27 +++----------------- + 6 files changed, 28 insertions(+), 110 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c +index 523a5b4..cf305af 100644 +--- a/xlators/cluster/afr/src/afr-inode-read.c ++++ b/xlators/cluster/afr/src/afr-inode-read.c +@@ -948,24 +948,13 @@ unlock: + goto unwind; + } + +- len = dict_serialized_length(local->dict); +- if (len <= 0) { +- goto unwind; +- } +- +- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char); +- if (!lockinfo_buf) { ++ op_ret = dict_allocate_and_serialize( ++ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); ++ if (op_ret != 0) { + local->op_ret = -1; +- local->op_errno = ENOMEM; + goto unwind; + } + +- op_ret = dict_serialize(local->dict, lockinfo_buf); +- if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; +- } +- + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { +@@ -1064,24 +1053,13 @@ unlock: + goto unwind; + } + +- len = dict_serialized_length(local->dict); +- if (len <= 0) { +- goto unwind; +- } +- +- lockinfo_buf = GF_CALLOC(1, len, gf_common_mt_char); +- if (!lockinfo_buf) { ++ op_ret = dict_allocate_and_serialize( ++ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); ++ if (op_ret != 0) { + local->op_ret = -1; +- local->op_errno = ENOMEM; + goto unwind; + } + +- op_ret = dict_serialize(local->dict, lockinfo_buf); +- if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; +- } +- + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { +diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c +index 99e5534..9d712b3 100644 +--- a/xlators/cluster/ec/src/ec-combine.c ++++ b/xlators/cluster/ec/src/ec-combine.c +@@ -486,22 +486,12 @@ ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key) + + tmp = NULL; + +- len = dict_serialized_length(lockinfo); +- if (len < 0) { +- err = len; +- +- goto out; +- } +- ptr = GF_MALLOC(len, gf_common_mt_char); +- if (ptr == NULL) { +- err = -ENOMEM; +- +- goto out; +- } +- err = dict_serialize(lockinfo, ptr); ++ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr, ++ (unsigned int *)&len); + if (err != 0) { + goto out; + } ++ + dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict; + err = dict_set_dynptr(dict, key, ptr, len); + if (err != 0) { +diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c +index 5ae0125..cdd1ff7 100644 +--- a/xlators/features/locks/src/posix.c ++++ b/xlators/features/locks/src/posix.c +@@ -1547,8 +1547,9 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, + goto out; + } + +- len = dict_serialized_length(tmp); +- if (len < 0) { ++ op_ret = dict_allocate_and_serialize(tmp, (char **)&buf, ++ (unsigned int *)&len); ++ if (op_ret != 0) { + *op_errno = -op_ret; + op_ret = -1; + gf_log(this->name, GF_LOG_WARNING, +@@ -1558,24 +1559,6 @@ pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict, + goto out; + } + +- buf = GF_CALLOC(1, len, gf_common_mt_char); +- if (buf == NULL) { +- op_ret = -1; +- *op_errno = ENOMEM; +- goto out; +- } +- +- op_ret = dict_serialize(tmp, buf); +- if (op_ret < 0) { +- *op_errno = -op_ret; +- op_ret = -1; +- gf_log(this->name, GF_LOG_WARNING, +- "dict_serialize failed (%s) while handling lockinfo " +- "for fd (ptr: %p inode-gfid:%s)", +- strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid)); +- goto out; +- } +- + op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len); + if (op_ret < 0) { + *op_errno = -op_ret; +diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c +index 0002361..6b20d92 100644 +--- a/xlators/protocol/client/src/client-handshake.c ++++ b/xlators/protocol/client/src/client-handshake.c +@@ -1286,18 +1286,10 @@ client_setvolume(xlator_t *this, struct rpc_clnt *rpc) + "Failed to set client opversion in handshake message"); + } + +- ret = dict_serialized_length(options); +- if (ret < 0) { +- gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_ERROR, +- "failed to get serialized length of dict"); ++ ret = dict_allocate_and_serialize(options, (char **)&req.dict.dict_val, ++ &req.dict.dict_len); ++ if (ret != 0) { + ret = -1; +- goto fail; +- } +- req.dict.dict_len = ret; +- req.dict.dict_val = GF_CALLOC(1, req.dict.dict_len, +- gf_client_mt_clnt_req_buf_t); +- ret = dict_serialize(options, req.dict.dict_val); +- if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, PC_MSG_DICT_SERIALIZE_FAIL, + "failed to serialize " + "dictionary"); +diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c +index eeca73c..54dc030 100644 +--- a/xlators/protocol/server/src/server-handshake.c ++++ b/xlators/protocol/server/src/server-handshake.c +@@ -676,22 +676,16 @@ fail: + GF_ASSERT(rsp); + + rsp->op_ret = 0; +- ret = dict_serialized_length(reply); +- if (ret > 0) { +- rsp->dict.dict_len = ret; +- rsp->dict.dict_val = GF_CALLOC(1, rsp->dict.dict_len, +- gf_server_mt_rsp_buf_t); +- if (rsp->dict.dict_val) { +- ret = dict_serialize(reply, rsp->dict.dict_val); +- if (ret < 0) { +- gf_msg_debug("server-handshake", 0, +- "failed " +- "to serialize reply dict"); +- op_ret = -1; +- op_errno = -ret; +- } +- } ++ ++ ret = dict_allocate_and_serialize(reply, (char **)&rsp->dict.dict_val, ++ &rsp->dict.dict_len); ++ if (ret != 0) { ++ ret = -1; ++ gf_msg_debug("server-handshake", 0, "failed to serialize reply dict"); ++ op_ret = -1; ++ op_errno = -ret; + } ++ + rsp->op_ret = op_ret; + rsp->op_errno = gf_errno_to_error(op_errno); + +diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c +index e74a24d..33959b5 100644 +--- a/xlators/protocol/server/src/server-helpers.c ++++ b/xlators/protocol/server/src/server-helpers.c +@@ -902,7 +902,6 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp) + gfs3_dirplist *trav = NULL; + gfs3_dirplist *prev = NULL; + int ret = -1; +- int temp = 0; + + GF_VALIDATE_OR_GOTO("server", entries, out); + GF_VALIDATE_OR_GOTO("server", rsp, out); +@@ -923,28 +922,10 @@ serialize_rsp_direntp(gf_dirent_t *entries, gfs3_readdirp_rsp *rsp) + + /* if 'dict' is present, pack it */ + if (entry->dict) { +- temp = dict_serialized_length(entry->dict); +- +- if (temp < 0) { +- gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, PS_MSG_INVALID_ENTRY, +- "failed to get " +- "serialized length of reply dict"); +- errno = EINVAL; +- trav->dict.dict_len = 0; +- goto out; +- } +- trav->dict.dict_len = temp; +- +- trav->dict.dict_val = GF_CALLOC(1, trav->dict.dict_len, +- gf_server_mt_rsp_buf_t); +- if (!trav->dict.dict_val) { +- errno = ENOMEM; +- trav->dict.dict_len = 0; +- goto out; +- } +- +- ret = dict_serialize(entry->dict, trav->dict.dict_val); +- if (ret < 0) { ++ ret = dict_allocate_and_serialize(entry->dict, ++ (char **)&trav->dict.dict_val, ++ &trav->dict.dict_len); ++ if (ret != 0) { + gf_msg(THIS->name, GF_LOG_ERROR, 0, PS_MSG_DICT_SERIALIZE_FAIL, + "failed to serialize reply dict"); + errno = -ret; +-- +1.8.3.1 + diff --git a/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch b/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch new file mode 100644 index 0000000..94e0b64 --- /dev/null +++ b/SOURCES/0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch @@ -0,0 +1,102 @@ +From 32281b4b5cf79d0ef6f0c65775bb81093e1ba479 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Wed, 24 Feb 2021 18:44:12 +0530 +Subject: [PATCH 536/538] dht: Ongoing IO is failed during volume shrink + operation (#2188) + +In the commit (c878174) we have introduced a check +to avoid stale layout issue.To avoid a stale layout +issue dht has set a key along with layout at the time +of wind a create fop and posix validates the parent +layout based on the key value. If layout does not match +it throw and error.In case of volume shrink layout has +been changed by reabalance daemon and if layout does not +matches dht is not able to wind a create fop successfully. + +Solution: To avoid the issue populate a key only while + dht has wind a fop first time. After got an + error in 2nd attempt dht takes a lock and then + reattempt to wind a fop again. + +> Fixes: #2187 +> Change-Id: Ie018386e7823a11eea415496bb226ca032453a55 +> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +> (Cherry pick from commit da6ce622b722f7d12619c5860293faf03f7cd00c +> Reviewed on upstream link https://github.com/gluster/glusterfs/pull/2188 + +Bug: 1924044 +Change-Id: I7670dbe2d562b83db0af3753f994653ffdd49591 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/228941 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/dht/src/dht-common.c | 41 ++++++++++++++++++++++++++---------- + 1 file changed, 30 insertions(+), 11 deletions(-) + +diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c +index fe1d0ee..7425c1a 100644 +--- a/xlators/cluster/dht/src/dht-common.c ++++ b/xlators/cluster/dht/src/dht-common.c +@@ -8526,15 +8526,32 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + { + dht_local_t *local = NULL; + xlator_t *avail_subvol = NULL; ++ int lk_count = 0; + + local = frame->local; + + if (!dht_is_subvol_filled(this, subvol)) { +- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, +- subvol->name); +- +- dht_set_parent_layout_in_dict(loc, this, local); +- ++ lk_count = local->lock[0].layout.parent_layout.lk_count; ++ gf_msg_debug(this->name, 0, "creating %s on %s with lock_count %d", ++ loc->path, subvol->name, lk_count); ++ /*The function dht_set_parent_layout_in_dict sets the layout ++ in dictionary and posix_create validates a layout before ++ creating a file.In case if parent layout does not match ++ with disk layout posix xlator throw an error but in case ++ if volume is shrunk layout has been changed by rebalance daemon ++ so we need to call this function only while a function is calling ++ without taking any lock otherwise we would not able to populate a ++ layout on disk in case if layout has changed. ++ */ ++ if (!lk_count) { ++ dht_set_parent_layout_in_dict(loc, this, local); ++ } else { ++ /* Delete a key to avoid layout validate if it was set by ++ previous STACK_WIND attempt when a lock was not taken ++ by dht_create ++ */ ++ (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY); ++ } + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +@@ -8554,12 +8571,14 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this, + + goto out; + } +- +- gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, +- subvol->name); +- +- dht_set_parent_layout_in_dict(loc, this, local); +- ++ lk_count = local->lock[0].layout.parent_layout.lk_count; ++ gf_msg_debug(this->name, 0, "creating %s on %s with lk_count %d", ++ loc->path, subvol->name, lk_count); ++ if (!lk_count) { ++ dht_set_parent_layout_in_dict(loc, this, local); ++ } else { ++ (void)dict_del_sizen(local->params, GF_PREOP_PARENT_KEY); ++ } + STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol, + subvol->fops->create, loc, flags, mode, umask, fd, + params); +-- +1.8.3.1 + diff --git a/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch b/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch new file mode 100644 index 0000000..dcf0940 --- /dev/null +++ b/SOURCES/0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch @@ -0,0 +1,387 @@ +From 7b7ec67680415c22773ebb2a5daacf298b6b1e06 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Sat, 13 Feb 2021 18:37:32 +0100 +Subject: [PATCH 537/538] cluster/afr: Fix race in lockinfo (f)getxattr + +A shared dictionary was updated outside the lock after having updated +the number of remaining answers. This means that one thread may be +processing the last answer and unwinding the request before another +thread completes updating the dict. + + Thread 1 Thread 2 + + LOCK() + call_cnt-- (=1) + UNLOCK() + LOCK() + call_cnt-- (=0) + UNLOCK() + update_dict(dict) + if (call_cnt == 0) { + STACK_UNWIND(dict); + } + update_dict(dict) + if (call_cnt == 0) { + STACK_UNWIND(dict); + } + +The updates from thread 1 are lost. + +This patch also reduces the work done inside the locked region and +reduces code duplication. + +Upstream-patch: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2162 +> Fixes: #2161 +> Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1911292 +Change-Id: Idc0d34ab19ea6031de0641f7b05c624d90fac8fa +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/228924 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/afr/src/afr-inode-read.c | 254 ++++++++++++++----------------- + 1 file changed, 112 insertions(+), 142 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c +index cf305af..98e195a 100644 +--- a/xlators/cluster/afr/src/afr-inode-read.c ++++ b/xlators/cluster/afr/src/afr-inode-read.c +@@ -15,6 +15,8 @@ + #include <stdlib.h> + #include <signal.h> + ++#include <urcu/uatomic.h> ++ + #include <glusterfs/glusterfs.h> + #include "afr.h" + #include <glusterfs/dict.h> +@@ -868,188 +870,121 @@ afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + return 0; + } + +-int32_t +-afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) ++static int32_t ++afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata) + { +- int call_cnt = 0, len = 0; +- char *lockinfo_buf = NULL; +- dict_t *lockinfo = NULL, *newdict = NULL; +- afr_local_t *local = NULL; ++ afr_local_t *local; ++ dict_t *local_dict; ++ dict_t *local_xdata; ++ int32_t ret; + +- LOCK(&frame->lock); +- { +- local = frame->local; ++ local = frame->local; ++ local_dict = NULL; ++ local_xdata = NULL; + +- call_cnt = --local->call_count; ++ ret = -ENOMEM; + +- if ((op_ret < 0) || (!dict && !xdata)) { +- goto unlock; +- } +- +- if (xdata) { +- if (!local->xdata_rsp) { +- local->xdata_rsp = dict_new(); +- if (!local->xdata_rsp) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; +- } +- } ++ if ((dict != NULL) && (local->dict == NULL)) { ++ local_dict = dict_new(); ++ if (local_dict == NULL) { ++ goto done; + } ++ } + +- if (!dict) { +- goto unlock; ++ if ((xdata != NULL) && (local->xdata_rsp == NULL)) { ++ local_xdata = dict_new(); ++ if (local_xdata == NULL) { ++ goto done; + } ++ } + +- op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, +- (void **)&lockinfo_buf, &len); ++ if ((local_dict != NULL) || (local_xdata != NULL)) { ++ /* TODO: Maybe it would be better to preallocate both dicts before ++ * sending the requests. This way we don't need to use a LOCK() ++ * here. */ ++ LOCK(&frame->lock); + +- if (!lockinfo_buf) { +- goto unlock; ++ if ((local_dict != NULL) && (local->dict == NULL)) { ++ local->dict = local_dict; ++ local_dict = NULL; + } + +- if (!local->dict) { +- local->dict = dict_new(); +- if (!local->dict) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; +- } ++ if ((local_xdata != NULL) && (local->xdata_rsp == NULL)) { ++ local->xdata_rsp = local_xdata; ++ local_xdata = NULL; + } +- } +-unlock: +- UNLOCK(&frame->lock); + +- if (lockinfo_buf != NULL) { +- lockinfo = dict_new(); +- if (lockinfo == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- } else { +- op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); +- +- if (lockinfo && local->dict) { +- dict_copy(lockinfo, local->dict); +- } +- } +- } +- +- if (xdata && local->xdata_rsp) { +- dict_copy(xdata, local->xdata_rsp); ++ UNLOCK(&frame->lock); + } + +- if (!call_cnt) { +- newdict = dict_new(); +- if (!newdict) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unwind; ++ if (dict != NULL) { ++ if (dict_copy(dict, local->dict) < 0) { ++ goto done; + } ++ } + +- op_ret = dict_allocate_and_serialize( +- local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); +- if (op_ret != 0) { +- local->op_ret = -1; +- goto unwind; ++ if (xdata != NULL) { ++ if (dict_copy(xdata, local->xdata_rsp) < 0) { ++ goto done; + } ++ } + +- op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, +- (void *)lockinfo_buf, len); +- if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; +- goto unwind; +- } ++ ret = 0; + +- unwind: +- AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict, +- local->xdata_rsp); ++done: ++ if (local_dict != NULL) { ++ dict_unref(local_dict); + } + +- dict_unref(lockinfo); ++ if (local_xdata != NULL) { ++ dict_unref(local_xdata); ++ } + +- return 0; ++ return ret; + } + +-int32_t +-afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +- int32_t op_ret, int32_t op_errno, dict_t *dict, +- dict_t *xdata) ++static void ++afr_getxattr_lockinfo_cbk_common(call_frame_t *frame, int32_t op_ret, ++ int32_t op_errno, dict_t *dict, dict_t *xdata, ++ bool is_fgetxattr) + { +- int call_cnt = 0, len = 0; ++ int len = 0; + char *lockinfo_buf = NULL; + dict_t *lockinfo = NULL, *newdict = NULL; + afr_local_t *local = NULL; + +- LOCK(&frame->lock); +- { +- local = frame->local; +- +- call_cnt = --local->call_count; +- +- if ((op_ret < 0) || (!dict && !xdata)) { +- goto unlock; +- } +- +- if (xdata) { +- if (!local->xdata_rsp) { +- local->xdata_rsp = dict_new(); +- if (!local->xdata_rsp) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; +- } +- } +- } +- +- if (!dict) { +- goto unlock; +- } ++ local = frame->local; + ++ if ((op_ret >= 0) && (dict != NULL)) { + op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY, + (void **)&lockinfo_buf, &len); +- +- if (!lockinfo_buf) { +- goto unlock; +- } +- +- if (!local->dict) { +- local->dict = dict_new(); +- if (!local->dict) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- goto unlock; ++ if (lockinfo_buf != NULL) { ++ lockinfo = dict_new(); ++ if (lockinfo == NULL) { ++ op_ret = -1; ++ } else { ++ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); + } + } + } +-unlock: +- UNLOCK(&frame->lock); + +- if (lockinfo_buf != NULL) { +- lockinfo = dict_new(); +- if (lockinfo == NULL) { +- local->op_ret = -1; +- local->op_errno = ENOMEM; +- } else { +- op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo); +- +- if (lockinfo && local->dict) { +- dict_copy(lockinfo, local->dict); +- } ++ if ((op_ret >= 0) && ((lockinfo != NULL) || (xdata != NULL))) { ++ op_ret = afr_update_local_dicts(frame, lockinfo, xdata); ++ if (lockinfo != NULL) { ++ dict_unref(lockinfo); + } + } + +- if (xdata && local->xdata_rsp) { +- dict_copy(xdata, local->xdata_rsp); ++ if (op_ret < 0) { ++ local->op_ret = -1; ++ local->op_errno = ENOMEM; + } + +- if (!call_cnt) { ++ if (uatomic_sub_return(&local->call_count, 1) == 0) { + newdict = dict_new(); + if (!newdict) { + local->op_ret = -1; +- local->op_errno = ENOMEM; ++ local->op_errno = op_errno = ENOMEM; + goto unwind; + } + +@@ -1057,23 +992,58 @@ unlock: + local->dict, (char **)&lockinfo_buf, (unsigned int *)&len); + if (op_ret != 0) { + local->op_ret = -1; ++ local->op_errno = op_errno = ENOMEM; + goto unwind; + } + + op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY, + (void *)lockinfo_buf, len); + if (op_ret < 0) { +- local->op_ret = -1; +- local->op_errno = -op_ret; ++ GF_FREE(lockinfo_buf); ++ local->op_ret = op_ret = -1; ++ local->op_errno = op_errno = -op_ret; + goto unwind; + } + + unwind: +- AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict, +- local->xdata_rsp); ++ /* TODO: These unwinds use op_ret and op_errno instead of local->op_ret ++ * and local->op_errno. This doesn't seem right because any ++ * failure during processing of each answer could be silently ++ * ignored. This is kept this was the old behavior and because ++ * local->op_ret is initialized as -1 and local->op_errno is ++ * initialized as EUCLEAN, which makes these values useless. */ ++ if (is_fgetxattr) { ++ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict, ++ local->xdata_rsp); ++ } else { ++ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict, ++ local->xdata_rsp); ++ } ++ ++ if (newdict != NULL) { ++ dict_unref(newdict); ++ } + } ++} ++ ++static int32_t ++afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) ++{ ++ afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata, ++ false); + +- dict_unref(lockinfo); ++ return 0; ++} ++ ++static int32_t ++afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this, ++ int32_t op_ret, int32_t op_errno, dict_t *dict, ++ dict_t *xdata) ++{ ++ afr_getxattr_lockinfo_cbk_common(frame, op_ret, op_errno, dict, xdata, ++ true); + + return 0; + } +-- +1.8.3.1 + diff --git a/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch b/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch new file mode 100644 index 0000000..de164a3 --- /dev/null +++ b/SOURCES/0538-afr-fix-coverity-issue-introduced-by-90cefde.patch @@ -0,0 +1,46 @@ +From 31cd7627ff329a39691239322df3bc88e962ad02 Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Mon, 1 Mar 2021 05:19:39 +0100 +Subject: [PATCH 538/538] afr: fix coverity issue introduced by 90cefde + +Fixes coverity issues 1447029 and 1447028. + +Backport of: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2201 +> Updates: #2161 +> Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1911292 +Change-Id: I6a564231d6aeb76de20675b7ced5d45eed8c377f +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/229200 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/cluster/afr/src/afr-inode-read.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c +index 98e195a..d874172 100644 +--- a/xlators/cluster/afr/src/afr-inode-read.c ++++ b/xlators/cluster/afr/src/afr-inode-read.c +@@ -918,13 +918,13 @@ afr_update_local_dicts(call_frame_t *frame, dict_t *dict, dict_t *xdata) + } + + if (dict != NULL) { +- if (dict_copy(dict, local->dict) < 0) { ++ if (dict_copy(dict, local->dict) == NULL) { + goto done; + } + } + + if (xdata != NULL) { +- if (dict_copy(xdata, local->xdata_rsp) < 0) { ++ if (dict_copy(xdata, local->xdata_rsp) == NULL) { + goto done; + } + } +-- +1.8.3.1 + diff --git a/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch b/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch new file mode 100644 index 0000000..18f851f --- /dev/null +++ b/SOURCES/0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch @@ -0,0 +1,62 @@ +From 88523814fe296c9cc9f7619e06210830f59c5edf Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez <xhernandez@redhat.com> +Date: Fri, 12 Mar 2021 10:32:09 +0100 +Subject: [PATCH 539/539] extras: disable lookup-optimize in virt and block + groups + +lookup-optimize doesn't provide any benefit for virtualized +environments and gluster-block workloads, but it's known to cause +corruption in some cases when sharding is also enabled and the volume +is expanded or shrunk. + +For this reason, we disable lookup-optimize by default on those +environments. + +Backport of: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2254 +> Fixes: #2253 +> Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71 +> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> + +BUG: 1939372 +Change-Id: I25861aa50b335556a995a9c33318dd3afb41bf71 +Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/231173 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/group-distributed-virt | 1 + + extras/group-gluster-block | 1 + + extras/group-virt.example | 1 + + 3 files changed, 3 insertions(+) + +diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt +index a960b76..6da3de0 100644 +--- a/extras/group-distributed-virt ++++ b/extras/group-distributed-virt +@@ -8,3 +8,4 @@ user.cifs=off + client.event-threads=4 + server.event-threads=4 + performance.client-io-threads=on ++cluster.lookup-optimize=off +diff --git a/extras/group-gluster-block b/extras/group-gluster-block +index 1e39801..b8d3e8d 100644 +--- a/extras/group-gluster-block ++++ b/extras/group-gluster-block +@@ -25,3 +25,4 @@ features.shard-block-size=64MB + user.cifs=off + server.allow-insecure=on + cluster.choose-local=off ++cluster.lookup-optimize=off +diff --git a/extras/group-virt.example b/extras/group-virt.example +index 3a441eb..155f5f5 100644 +--- a/extras/group-virt.example ++++ b/extras/group-virt.example +@@ -21,3 +21,4 @@ server.tcp-user-timeout=20 + server.keepalive-time=10 + server.keepalive-interval=2 + server.keepalive-count=5 ++cluster.lookup-optimize=off +-- +1.8.3.1 + diff --git a/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch b/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch new file mode 100644 index 0000000..0a89c64 --- /dev/null +++ b/SOURCES/0540-extras-Disable-write-behind-for-group-samba.patch @@ -0,0 +1,37 @@ +From 6895b6c67e9c29af3f966b4d9ee5cb40da763d24 Mon Sep 17 00:00:00 2001 +From: Mohit Agrawal <moagrawa@redhat.com> +Date: Wed, 14 Apr 2021 12:38:45 +0530 +Subject: [PATCH 540/540] extras: Disable write-behind for group samba. + +when write-behind is enabled with Samba it could be a +source of data corruption. The translator, while +processing a write call, immediately returns success but continues +writing the data to the server in the background. This can cause data +corruption when two clients relying on Samba to provide data consistency +are operating on the same file. + +> fixes: https://github.com/gluster/glusterfs/issues/2329 + +Change-Id: I5265056ff315a5f3cd97ea11b18db0831b1b901d +Solution: Disable write-behind for samba group +BUG: 1948547 +Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/235876 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + extras/group-samba | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/extras/group-samba b/extras/group-samba +index eeee6e0..9611a1f 100644 +--- a/extras/group-samba ++++ b/extras/group-samba +@@ -9,3 +9,4 @@ performance.nl-cache=on + performance.nl-cache-timeout=600 + performance.readdir-ahead=on + performance.parallel-readdir=on ++performance.write-behind=off +-- +1.8.3.1 + diff --git a/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch b/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch new file mode 100644 index 0000000..29135df --- /dev/null +++ b/SOURCES/0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch @@ -0,0 +1,545 @@ +From 23ab7175e64ab4d75fbcb6874008843cc78b65b8 Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Fri, 16 Apr 2021 18:48:56 +0530 +Subject: [PATCH 541/542] glusterd-volgen: Add functionality to accept any + custom xlator + +Add new function which allow users to insert any custom xlators. +It makes to provide a way to add any processing into file operations. + +Users can deploy the plugin(xlator shared object) and integrate it to glusterfsd. + +If users want to enable a custom xlator, do the follows: + +1. put xlator object(.so file) into "XLATOR_DIR/user/" +2. set the option user.xlator.<xlator> to the existing xlator-name to specify of the position in graph +3. restart gluster volume + +Options for custom xlator are able to set in "user.xlator.<xlator>.<optkey>". + +Backport of : +>https://github.com/gluster/glusterfs/commit/ea86b664f3b1f54901ce1b7d7fba7d80456f2089 +>Fixes: https://github.com/gluster/glusterfs/issues/1943 +>Change-Id: Ife3ae1514ea474f5dae2897223012f9d04b64674 +>Signed-off-by:Ryo Furuhashi <ryo.furuhashi.nh@hitachi.com> +>Co-authored-by: Yaniv Kaul <ykaul@redhat.com> +>Co-authored-by: Xavi Hernandez <xhernandez@users.noreply.github.com> + +Change-Id: Ic8f28bfcfde67213eb1092b0ebf4822c874d37bb +BUG: 1927235 +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/236830 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com> +Reviewed-by: Xavi Hernandez Juan <xhernandez@redhat.com> +--- + cli/src/cli-rpc-ops.c | 148 ++++++++++++++++++++------ + cli/src/cli.h | 2 - + tests/basic/user-xlator.t | 65 ++++++++++++ + tests/env.rc.in | 3 + + xlators/mgmt/glusterd/src/glusterd-volgen.c | 155 ++++++++++++++++++++++++++++ + 5 files changed, 342 insertions(+), 31 deletions(-) + create mode 100755 tests/basic/user-xlator.t + +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 4e91265..51b5447 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -2269,49 +2269,131 @@ out: + return ret; + } + +-char * +-is_server_debug_xlator(void *myframe) ++/* ++ * returns ++ * 1 : is server debug xlator ++ * 0 : is not server debug xlator ++ * <0 : error ++ */ ++static int ++is_server_debug_xlator(char *key, char *value) ++{ ++ if (!key || !value) ++ return -1; ++ ++ if (strcmp("debug.trace", key) == 0 || ++ strcmp("debug.error-gen", key) == 0) { ++ if (strcmp("client", value) == 0) ++ return 0; ++ else ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/* ++ * returns ++ * 1 : is user xlator ++ * 0 : is not user xlator ++ * <0 : error ++ */ ++static int ++is_server_user_xlator(char *key, char *value) ++{ ++ int ret = 0; ++ ++ if (!key || !value) ++ return -1; ++ ++ ret = fnmatch("user.xlator.*", key, 0); ++ if (ret < 0) { ++ ret = -1; ++ goto out; ++ } else if (ret == FNM_NOMATCH) { ++ ret = 0; ++ goto out; ++ } ++ ++ ret = fnmatch("user.xlator.*.*", key, 0); ++ if (ret < 0) { ++ ret = -1; ++ goto out; ++ } else if (ret != FNM_NOMATCH) { // this is user xlator's option key ++ ret = 0; ++ goto out; ++ } ++ ++ ret = 1; ++ ++out: ++ return ret; ++} ++ ++static int ++added_server_xlator(void *myframe, char **added_xlator) + { + call_frame_t *frame = NULL; + cli_local_t *local = NULL; + char **words = NULL; + char *key = NULL; + char *value = NULL; +- char *debug_xlator = NULL; ++ int ret = 0; + + frame = myframe; + local = frame->local; + words = (char **)local->words; + + while (*words != NULL) { +- if (strstr(*words, "trace") == NULL && +- strstr(*words, "error-gen") == NULL) { +- words++; +- continue; +- } +- + key = *words; + words++; + value = *words; +- if (value == NULL) ++ ++ if (!value) { + break; +- if (strstr(value, "client")) { +- words++; +- continue; +- } else { +- if (!(strstr(value, "posix") || strstr(value, "acl") || +- strstr(value, "locks") || strstr(value, "io-threads") || +- strstr(value, "marker") || strstr(value, "index"))) { +- words++; +- continue; +- } else { +- debug_xlator = gf_strdup(key); +- break; ++ } ++ ++ ret = is_server_debug_xlator(key, value); ++ if (ret < 0) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "failed to check that debug xlator was added"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (ret) { ++ *added_xlator = gf_strdup(key); ++ if (!*added_xlator) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "Out of memory"); ++ ret = -1; ++ goto out; ++ } ++ break; ++ } ++ ++ ret = is_server_user_xlator(key, value); ++ if (ret < 0) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "failed to check that user xlator was added"); ++ ret = -1; ++ goto out; ++ } ++ ++ if (ret) { ++ *added_xlator = gf_strdup(key); ++ if (!*added_xlator) { ++ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR, ++ "Out of memory"); ++ ret = -1; ++ goto out; + } ++ break; + } + } + +- return debug_xlator; ++out: ++ return ret; + } + + int +@@ -2327,7 +2409,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + char msg[1024] = { + 0, + }; +- char *debug_xlator = NULL; ++ char *added_xlator = NULL; + char tmp_str[512] = { + 0, + }; +@@ -2365,18 +2447,26 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + * The process has to be restarted. So this is a check from the + * volume set option such that if debug xlators such as trace/errorgen + * are provided in the set command, warn the user. ++ * volume set option such that if user custom xlators or debug ++ * xlators such as trace/errorgen are provided in the set command, ++ * warn the user. + */ +- debug_xlator = is_server_debug_xlator(myframe); ++ ret = added_server_xlator(myframe, &added_xlator); ++ if (ret < 0) { ++ gf_log("cli", GF_LOG_ERROR, ++ "failed to check that server graph has been changed"); ++ goto out; ++ } + + if (dict_get_str(dict, "help-str", &help_str) && !msg[0]) + snprintf(msg, sizeof(msg), "Set volume %s", + (rsp.op_ret) ? "unsuccessful" : "successful"); +- if (rsp.op_ret == 0 && debug_xlator) { ++ if (rsp.op_ret == 0 && added_xlator) { + snprintf(tmp_str, sizeof(tmp_str), + "\n%s translator has been " + "added to the server volume file. Please restart the" + " volume for enabling the translator", +- debug_xlator); ++ added_xlator); + } + + if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) { +@@ -2394,7 +2484,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + cli_err("volume set: failed"); + } else { + if (help_str == NULL) { +- if (debug_xlator == NULL) ++ if (added_xlator == NULL) + cli_out("volume set: success"); + else + cli_out("volume set: success%s", tmp_str); +@@ -2408,7 +2498,7 @@ gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count, + out: + if (dict) + dict_unref(dict); +- GF_FREE(debug_xlator); ++ GF_FREE(added_xlator); + cli_cmd_broadcast_response(ret); + gf_free_xdr_cli_rsp(rsp); + return ret; +diff --git a/cli/src/cli.h b/cli/src/cli.h +index 7b4f446..b5b69ea 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -502,8 +502,6 @@ cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno, + int + cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict, + char *key); +-char * +-is_server_debug_xlator(void *myframe); + + int32_t + cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options, +diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t +new file mode 100755 +index 0000000..a711f9f +--- /dev/null ++++ b/tests/basic/user-xlator.t +@@ -0,0 +1,65 @@ ++#!/bin/bash ++ ++. $(dirname $0)/../include.rc ++. $(dirname $0)/../volume.rc ++ ++#### patchy.dev.d-backends-patchy1.vol ++brick=${B0//\//-} ++SERVER_VOLFILE="/var/lib/glusterd/vols/${V0}/${V0}.${H0}.${brick:1}-${V0}1.vol" ++ ++cleanup; ++ ++TEST mkdir -p $B0/single-brick ++TEST mkdir -p ${GLUSTER_XLATOR_DIR}/user ++ ++## deploy dummy user xlator ++TEST cp ${GLUSTER_XLATOR_DIR}/playground/template.so ${GLUSTER_XLATOR_DIR}/user/hoge.so ++ ++TEST glusterd ++TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6}; ++TEST $CLI volume set $V0 user.xlator.hoge posix ++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} ++ ++TEST $CLI volume set $V0 user.xlator.hoge.opt1 10 ++TEST grep -q '"option opt1 10"' ${SERVER_VOLFILE} ++TEST $CLI volume set $V0 user.xlator.hoge.opt2 hogehoge ++TEST grep -q '"option opt2 hogehoge"' ${SERVER_VOLFILE} ++TEST $CLI volume set $V0 user.xlator.hoge.opt3 true ++TEST grep -q '"option opt3 true"' ${SERVER_VOLFILE} ++ ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 ++ ++TEST $CLI volume set $V0 user.xlator.hoge trash ++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 ++ ++TEST ! $CLI volume set $V0 user.xlator.hoge unknown ++TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified. ++ ++TEST $CLI volume stop $V0 ++TEST $CLI volume start $V0 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 ++EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 ++ ++#### teardown ++ ++TEST rm -f ${GLUSTER_XLATOR_DIR}/user/hoge.so ++cleanup; +diff --git a/tests/env.rc.in b/tests/env.rc.in +index c7472a7..1f0ca88 100644 +--- a/tests/env.rc.in ++++ b/tests/env.rc.in +@@ -40,3 +40,6 @@ export GLUSTER_LIBEXECDIR + + RUN_NFS_TESTS=@BUILD_GNFS@ + export RUN_NFS_TESTS ++ ++GLUSTER_XLATOR_DIR=@libdir@/glusterfs/@PACKAGE_VERSION@/xlator ++export GLUSTER_XLATOR_DIR +\ No newline at end of file +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 1920284..a242b5c 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -45,6 +45,11 @@ struct gd_validate_reconf_opts { + + extern struct volopt_map_entry glusterd_volopt_map[]; + ++struct check_and_add_user_xlator_t { ++ volgen_graph_t *graph; ++ char *volname; ++}; ++ + #define RPC_SET_OPT(XL, CLI_OPT, XLATOR_OPT, ERROR_CMD) \ + do { \ + char *_value = NULL; \ +@@ -2822,6 +2827,145 @@ out: + return ret; + } + ++static gf_boolean_t ++check_user_xlator_position(dict_t *dict, char *key, data_t *value, ++ void *prev_xlname) ++{ ++ if (strncmp(key, "user.xlator.", SLEN("user.xlator.")) != 0) { ++ return false; ++ } ++ ++ if (fnmatch("user.xlator.*.*", key, 0) == 0) { ++ return false; ++ } ++ ++ char *value_str = data_to_str(value); ++ if (!value_str) { ++ return false; ++ } ++ ++ if (strcmp(value_str, prev_xlname) == 0) { ++ gf_log("glusterd", GF_LOG_INFO, ++ "found insert position of user-xlator(%s)", key); ++ return true; ++ } ++ ++ return false; ++} ++ ++static int ++set_user_xlator_option(dict_t *set_dict, char *key, data_t *value, void *data) ++{ ++ xlator_t *xl = data; ++ char *optname = strrchr(key, '.') + 1; ++ ++ gf_log("glusterd", GF_LOG_DEBUG, "set user xlator option %s = %s", key, ++ value->data); ++ ++ return xlator_set_option(xl, optname, strlen(optname), data_to_str(value)); ++} ++ ++static int ++insert_user_xlator_to_graph(dict_t *set_dict, char *key, data_t *value, ++ void *action_data) ++{ ++ int ret = -1; ++ ++ struct check_and_add_user_xlator_t *data = action_data; ++ ++ char *xlator_name = strrchr(key, '.') + 1; // user.xlator.<xlator_name> ++ char *xlator_option_matcher = NULL; ++ char *type = NULL; ++ xlator_t *xl = NULL; ++ ++ // convert optkey to xlator type ++ if (gf_asprintf(&type, "user/%s", xlator_name) < 0) { ++ gf_log("glusterd", GF_LOG_ERROR, "failed to generate user-xlator type"); ++ goto out; ++ } ++ ++ gf_log("glusterd", GF_LOG_INFO, "add user xlator=%s to graph", type); ++ ++ xl = volgen_graph_add(data->graph, type, data->volname); ++ if (!xl) { ++ goto out; ++ } ++ ++ ret = gf_asprintf(&xlator_option_matcher, "user.xlator.%s.*", xlator_name); ++ if (ret < 0) { ++ gf_log("glusterd", GF_LOG_ERROR, ++ "failed to generate user-xlator option matcher"); ++ goto out; ++ } ++ ++ dict_foreach_fnmatch(set_dict, xlator_option_matcher, ++ set_user_xlator_option, xl); ++ ++out: ++ if (type) ++ GF_FREE(type); ++ if (xlator_option_matcher) ++ GF_FREE(xlator_option_matcher); ++ ++ return ret; ++} ++ ++static int ++validate_user_xlator_position(dict_t *this, char *key, data_t *value, ++ void *unused) ++{ ++ int ret = -1; ++ int i = 0; ++ ++ if (!value) ++ goto out; ++ ++ if (fnmatch("user.xlator.*.*", key, 0) == 0) { ++ ret = 0; ++ goto out; ++ } ++ ++ char *value_str = data_to_str(value); ++ if (!value_str) ++ goto out; ++ ++ int num_xlators = sizeof(server_graph_table) / ++ sizeof(server_graph_table[0]); ++ for (i = 0; i < num_xlators; i++) { ++ if (server_graph_table[i].dbg_key && ++ strcmp(value_str, server_graph_table[i].dbg_key) == 0) { ++ ret = 0; ++ goto out; ++ } ++ } ++ ++out: ++ if (ret == -1) ++ gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s", ++ key, value->data); ++ ++ return ret; ++} ++ ++static int ++check_and_add_user_xl(volgen_graph_t *graph, dict_t *set_dict, char *volname, ++ char *prev_xlname) ++{ ++ if (!prev_xlname) ++ goto out; ++ ++ struct check_and_add_user_xlator_t data = {.graph = graph, ++ .volname = volname}; ++ ++ if (dict_foreach_match(set_dict, check_user_xlator_position, prev_xlname, ++ insert_user_xlator_to_graph, &data) < 0) { ++ return -1; ++ } ++ ++out: ++ return 0; ++} ++ + static int + server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +@@ -2831,6 +2975,12 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + char *loglevel = NULL; + int i = 0; + ++ if (dict_foreach_fnmatch(set_dict, "user.xlator.*", ++ validate_user_xlator_position, NULL) < 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ + i = sizeof(server_graph_table) / sizeof(server_graph_table[0]) - 1; + + while (i >= 0) { +@@ -2848,6 +2998,11 @@ server_graph_builder(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + if (ret) + goto out; + ++ ret = check_and_add_user_xl(graph, set_dict, volinfo->volname, ++ server_graph_table[i].dbg_key); ++ if (ret) ++ goto out; ++ + i--; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch b/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch new file mode 100644 index 0000000..f6e0641 --- /dev/null +++ b/SOURCES/0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch @@ -0,0 +1,64 @@ +From f3db0c99faf813e0f2e9ffcf599416555a59df1f Mon Sep 17 00:00:00 2001 +From: Ashish Pandey <aspandey@redhat.com> +Date: Tue, 9 Feb 2021 16:43:35 +0530 +Subject: [PATCH 542/542] xlaotrs/mgmt: Fixing coverity issue 1445996 + +Backport of https://github.com/gluster/glusterfs/pull/2148/commits/9785e96e0bdf6e60896570fdf5e4a6976a6f60ba + +Fixing "Null pointer dereferences" + +BUG: 1927235 +Change-Id: Idbc014e1302d2450f97bccd028681198c0d97424 +Signed-off-by: Ashish Pandey <aspandey@redhat.com> +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/237433 +Tested-by: RHGS Build Bot <nigelb@redhat.com> +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + xlators/mgmt/glusterd/src/glusterd-volgen.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index a242b5c..71aed08 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -2916,21 +2916,23 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value, + { + int ret = -1; + int i = 0; ++ char *value_str = NULL; + + if (!value) + goto out; + ++ value_str = data_to_str(value); ++ if (!value_str) ++ goto out; ++ + if (fnmatch("user.xlator.*.*", key, 0) == 0) { + ret = 0; + goto out; + } + +- char *value_str = data_to_str(value); +- if (!value_str) +- goto out; +- + int num_xlators = sizeof(server_graph_table) / + sizeof(server_graph_table[0]); ++ + for (i = 0; i < num_xlators; i++) { + if (server_graph_table[i].dbg_key && + strcmp(value_str, server_graph_table[i].dbg_key) == 0) { +@@ -2942,7 +2944,7 @@ validate_user_xlator_position(dict_t *this, char *key, data_t *value, + out: + if (ret == -1) + gf_log("glusterd", GF_LOG_ERROR, "invalid user xlator position %s = %s", +- key, value->data); ++ key, value_str); + + return ret; + } +-- +1.8.3.1 + diff --git a/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch b/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch new file mode 100644 index 0000000..c6194c7 --- /dev/null +++ b/SOURCES/0543-glusterd-handle-custom-xlator-failure-cases.patch @@ -0,0 +1,162 @@ +From 71fc5b7949e00c4448f5ec1291e756b201a70082 Mon Sep 17 00:00:00 2001 +From: Ravishankar N <ravishankar@redhat.com> +Date: Thu, 29 Apr 2021 18:34:57 +0530 +Subject: [PATCH 543/543] glusterd: handle custom xlator failure cases + +Problem-1: +custom xlator insertion was failing for those xlators in the brick graph +whose dbg_key was NULL in the server_graph_table. Looking at the git log, +the dbg_key was added in commit d1397dbd7d6cdbd2d81d5d36d608b6175d449db4 +for inserting debug xlators. + +Fix: I think it is fine to define it for all brick xlators below server. + +Problem-2: +In the commit-op phase, glusterd_op_set_volume() updates the volinfo +dict with the key-value pairs and then proceeds to create the volfiles. +If any of the steps fail, the volinfo dict retains those key-values, +until glusterd is restarted or `gluster vol reset $VOLNAME` is issued. + +Fix: +Make a copy of the volinfo dict and if there are any failures in +proceeding with the set volume logic, restore the dict to its original +state. + +Backport of: +> Upstream-patch-link: https://github.com/gluster/glusterfs/pull/2371 +> Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849 +> Updates: #2370 +> Signed-off-by: Ravishankar N <ravishankar@redhat.com> + +Change-Id: I9010dab33d0139b8e6d603308e331b6d220a4849 +BUG: 1953901 +Signed-off-by: Ravishankar N <ravishankar@redhat.com> +Reviewed-on: https://code.engineering.redhat.com/gerrit/239889 +Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> +--- + tests/basic/user-xlator.t | 16 ++++++++++++++-- + xlators/mgmt/glusterd/src/glusterd-op-sm.c | 16 ++++++++++++++++ + xlators/mgmt/glusterd/src/glusterd-volgen.c | 14 +++++++------- + 3 files changed, 37 insertions(+), 9 deletions(-) + +diff --git a/tests/basic/user-xlator.t b/tests/basic/user-xlator.t +index a711f9f..ed2d831 100755 +--- a/tests/basic/user-xlator.t ++++ b/tests/basic/user-xlator.t +@@ -35,8 +35,18 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 + +-TEST $CLI volume set $V0 user.xlator.hoge trash +-TEST grep -q 'user/hoge' ${SERVER_VOLFILE} ++# Test that the insertion at all positions between server and posix is successful. ++# It is not guaranteed that the brick process will start/work in all positions though. ++TESTS_EXPECTED_IN_LOOP=34 ++declare -a brick_side_xlators=("decompounder" "io-stats" "quota" "index" "barrier" ++ "marker" "selinux" "io-threads" "upcall" "leases" ++ "read-only" "worm" "locks" "access-control" ++ "bitrot-stub" "changelog" "trash") ++for xlator in "${brick_side_xlators[@]}" ++ do ++ TEST_IN_LOOP $CLI volume set $V0 user.xlator.hoge $xlator ++ TEST_IN_LOOP grep -q 'user/hoge' ${SERVER_VOLFILE} ++ done + + TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 +@@ -49,6 +59,8 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}6 + + TEST ! $CLI volume set $V0 user.xlator.hoge unknown + TEST grep -q 'user/hoge' ${SERVER_VOLFILE} # When the CLI fails, the volfile is not modified. ++# User xlator insert failures must not prevent setting other volume options. ++TEST $CLI volume set $V0 storage.reserve 10% + + TEST $CLI volume stop $V0 + TEST $CLI volume start $V0 +diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +index 1e84f5f..893af29 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c ++++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c +@@ -2911,6 +2911,7 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + uint32_t new_op_version = 0; + gf_boolean_t quorum_action = _gf_false; + glusterd_svc_t *svc = NULL; ++ dict_t *volinfo_dict_orig = NULL; + + this = THIS; + GF_ASSERT(this); +@@ -2918,6 +2919,10 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + priv = this->private; + GF_ASSERT(priv); + ++ volinfo_dict_orig = dict_new(); ++ if (!volinfo_dict_orig) ++ goto out; ++ + ret = dict_get_int32n(dict, "count", SLEN("count"), &dict_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +@@ -2949,6 +2954,11 @@ glusterd_op_set_volume(dict_t *dict, char **errstr) + goto out; + } + ++ if (dict_copy(volinfo->dict, volinfo_dict_orig) == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ + /* TODO: Remove this once v3.3 compatibility is not required */ + check_op_version = dict_get_str_boolean(dict, "check-op-version", + _gf_false); +@@ -3171,6 +3181,12 @@ out: + gf_msg_debug(this->name, 0, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action(); ++ if (ret < 0 && count > 1) { ++ if (dict_reset(volinfo->dict) == 0) ++ dict_copy(volinfo_dict_orig, volinfo->dict); ++ } ++ if (volinfo_dict_orig) ++ dict_unref(volinfo_dict_orig); + return ret; + } + +diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c +index 71aed08..aa85bdb 100644 +--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c ++++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c +@@ -2706,24 +2706,24 @@ out: + static volgen_brick_xlator_t server_graph_table[] = { + {brick_graph_add_server, NULL}, + {brick_graph_add_decompounder, "decompounder"}, +- {brick_graph_add_io_stats, "NULL"}, ++ {brick_graph_add_io_stats, "io-stats"}, + {brick_graph_add_sdfs, "sdfs"}, + {brick_graph_add_namespace, "namespace"}, +- {brick_graph_add_cdc, NULL}, ++ {brick_graph_add_cdc, "cdc" }, + {brick_graph_add_quota, "quota"}, + {brick_graph_add_index, "index"}, +- {brick_graph_add_barrier, NULL}, ++ {brick_graph_add_barrier, "barrier" }, + {brick_graph_add_marker, "marker"}, + {brick_graph_add_selinux, "selinux"}, + {brick_graph_add_fdl, "fdl"}, + {brick_graph_add_iot, "io-threads"}, + {brick_graph_add_upcall, "upcall"}, + {brick_graph_add_leases, "leases"}, +- {brick_graph_add_pump, NULL}, +- {brick_graph_add_ro, NULL}, +- {brick_graph_add_worm, NULL}, ++ {brick_graph_add_pump, "pump" }, ++ {brick_graph_add_ro, "read-only" }, ++ {brick_graph_add_worm, "worm" }, + {brick_graph_add_locks, "locks"}, +- {brick_graph_add_acl, "acl"}, ++ {brick_graph_add_acl, "access-control"}, + {brick_graph_add_bitrot_stub, "bitrot-stub"}, + {brick_graph_add_changelog, "changelog"}, + #if USE_GFDB /* changetimerecorder depends on gfdb */ +-- +1.8.3.1 + diff --git a/SOURCES/0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch b/SOURCES/0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch new file mode 100644 index 0000000..87f7be7 --- /dev/null +++ b/SOURCES/0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch @@ -0,0 +1,2389 @@ +From e0ee7d755bc90c1f829e4ec3b0d5dfd9f9d480b9 Mon Sep 17 00:00:00 2001 +From: Tamar Shacked <tshacked@redhat.com> +Date: Mon, 26 Jul 2021 16:59:28 +0300 +Subject: rhel-9.0-beta build: fixing gcc-10 and LTO errors + +libgfapi symbol versions break LTO in Fedora rawhide/f33 +Upstream: +> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/24666/ +> fixes: #1352 +> Change-Id: I05fda580afacfff1bfc07be810dd1afc08a92fb8 +> Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> + +Duplicate defns of cli_default_conn_timeout and cli_ten_minutes_timeout +Upstream: +> Reviewed-on: https://review.gluster.org/#/c/glusterfs/+/23956/ +> Change-Id: I54ea485736a4910254eeb21222ad263721cdef3c +> Fixes: bz#1193929 +> Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com> + +BUG: 1939340 + +Signed-off-by: Tamar Shacked <tshacked@redhat.com> +Change-Id: If40e4733d12713d2821653cdbce7b6e931f12140 +--- + api/src/glfs-fops.c | 322 ++++++++++++------------------------ + api/src/glfs-handleops.c | 99 ++++------- + api/src/glfs-internal.h | 27 ++- + api/src/glfs-mgmt.c | 3 +- + api/src/glfs-resolve.c | 14 +- + api/src/glfs.c | 99 +++++------ + cli/src/cli-cmd-global.c | 1 - + cli/src/cli-cmd-misc.c | 4 - + cli/src/cli-cmd-peer.c | 4 - + cli/src/cli-cmd-snapshot.c | 2 - + cli/src/cli-cmd-system.c | 4 - + cli/src/cli-cmd-volume.c | 4 - + cli/src/cli-quotad-client.c | 3 - + cli/src/cli-rpc-ops.c | 5 +- + cli/src/cli.c | 4 + + cli/src/cli.h | 10 +- + 16 files changed, 223 insertions(+), 382 deletions(-) + +diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c +index 6dc3b66c8..e772aa77a 100644 +--- a/api/src/glfs-fops.c ++++ b/api/src/glfs-fops.c +@@ -292,6 +292,7 @@ glfs_iatt_to_statx(struct glfs *fs, const struct iatt *iatt, + statx->glfs_st_attributes_mask = 0; + } + ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_iatt_from_statx, 6.0) + void + priv_glfs_iatt_from_statx(struct iatt *iatt, const struct glfs_stat *statx) + { +@@ -371,7 +372,6 @@ priv_glfs_iatt_from_statx(struct iatt *iatt, const struct glfs_stat *statx) + iatt->ia_attributes = statx->glfs_st_attributes; + iatt->ia_attributes_mask = statx->glfs_st_attributes_mask; + } +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_iatt_from_statx, 6.0); + + void + glfsflags_from_gfapiflags(struct glfs_stat *stat, int *glvalid) +@@ -415,6 +415,7 @@ glfs_loc_unlink(loc_t *loc) + return 0; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0) + struct glfs_fd * + pub_glfs_open(struct glfs *fs, const char *path, int flags) + { +@@ -509,8 +510,7 @@ invalid_fs: + return glfd; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0) + int + pub_glfs_close(struct glfs_fd *glfd) + { +@@ -565,8 +565,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0) + int + pub_glfs_lstat(struct glfs *fs, const char *path, struct stat *stat) + { +@@ -607,8 +606,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0) + int + pub_glfs_stat(struct glfs *fs, const char *path, struct stat *stat) + { +@@ -649,8 +647,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0); +- ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_statx, 6.0) + int + priv_glfs_statx(struct glfs *fs, const char *path, const unsigned int mask, + struct glfs_stat *statxbuf) +@@ -704,8 +701,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_statx, 6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0) + int + pub_glfs_fstat(struct glfs_fd *glfd, struct stat *stat) + { +@@ -754,8 +750,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0) + struct glfs_fd * + pub_glfs_creat(struct glfs *fs, const char *path, int flags, mode_t mode) + { +@@ -902,8 +897,6 @@ invalid_fs: + return glfd; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0); +- + #ifdef HAVE_SEEK_HOLE + static int + glfs_seek(struct glfs_fd *glfd, off_t offset, int whence) +@@ -957,6 +950,7 @@ out: + } + #endif + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0) + off_t + pub_glfs_lseek(struct glfs_fd *glfd, off_t offset, int whence) + { +@@ -1012,8 +1006,6 @@ invalid_fs: + return -1; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0); +- + static ssize_t + glfs_preadv_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + off_t offset, int flags, struct glfs_stat *poststat) +@@ -1091,6 +1083,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0) + ssize_t + pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + off_t offset, int flags) +@@ -1098,8 +1091,7 @@ pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + return glfs_preadv_common(glfd, iovec, iovcnt, offset, flags, NULL); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0) + ssize_t + pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags) + { +@@ -1116,8 +1108,7 @@ pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0) + ssize_t + pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset, + int flags) +@@ -1135,8 +1126,7 @@ pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, 6.0) + ssize_t + pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset, + int flags, struct glfs_stat *poststat) +@@ -1154,8 +1144,7 @@ pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, 6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0) + ssize_t + pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count, + int flags) +@@ -1167,8 +1156,6 @@ pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0); +- + struct glfs_io { + struct glfs_fd *glfd; + int op; +@@ -1370,6 +1357,7 @@ invalid_fs: + return -1; + } + ++GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0) + int + pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec, + int count, off_t offset, int flags, glfs_io_cbk34 fn, +@@ -1379,8 +1367,7 @@ pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec, + (void *)fn, data); + } + +-GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, 6.0) + int + pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec, + int count, off_t offset, int flags, glfs_io_cbk fn, +@@ -1390,8 +1377,7 @@ pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec, + _gf_false, fn, data); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, 6.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0) + int + pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags, + glfs_io_cbk34 fn, void *data) +@@ -1410,8 +1396,7 @@ pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, 6.0) + int + pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags, + glfs_io_cbk fn, void *data) +@@ -1430,8 +1415,7 @@ pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, 6.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0) + int + pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count, + off_t offset, int flags, glfs_io_cbk34 fn, void *data) +@@ -1450,8 +1434,7 @@ pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, 6.0) + int + pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count, + off_t offset, int flags, glfs_io_cbk fn, void *data) +@@ -1470,8 +1453,7 @@ pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, 6.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0) + int + pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count, + int flags, glfs_io_cbk34 fn, void *data) +@@ -1483,8 +1465,7 @@ pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, 6.0) + int + pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count, + int flags, glfs_io_cbk fn, void *data) +@@ -1496,8 +1477,6 @@ pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, 6.0); +- + static ssize_t + glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + off_t offset, int flags, struct glfs_stat *prestat, +@@ -1591,6 +1570,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, 6.0) + ssize_t + pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in, + struct glfs_fd *glfd_out, off64_t *off_out, size_t len, +@@ -1744,8 +1724,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, 6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0) + ssize_t + pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + off_t offset, int flags) +@@ -1753,8 +1732,7 @@ pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, + return glfs_pwritev_common(glfd, iovec, iovcnt, offset, flags, NULL, NULL); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0) + ssize_t + pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags) + { +@@ -1771,8 +1749,7 @@ pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0) + ssize_t + pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count, + int flags) +@@ -1784,8 +1761,7 @@ pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0) + ssize_t + pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count, + off_t offset, int flags) +@@ -1803,8 +1779,7 @@ pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, 6.0) + ssize_t + pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count, + off_t offset, int flags, struct glfs_stat *prestat, +@@ -1823,8 +1798,6 @@ pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, 6.0); +- + extern glfs_t * + pub_glfs_from_glfd(glfs_fd_t *); + +@@ -1943,6 +1916,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0) + int + pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec, + int count, off_t offset, int flags, glfs_io_cbk34 fn, +@@ -1952,8 +1926,7 @@ pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec, + _gf_true, (void *)fn, data); + } + +-GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, 6.0) + int + pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec, + int count, off_t offset, int flags, glfs_io_cbk fn, +@@ -1963,8 +1936,7 @@ pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec, + _gf_false, fn, data); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, 6.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0) + int + pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count, + int flags, glfs_io_cbk34 fn, void *data) +@@ -1983,8 +1955,7 @@ pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, 6.0) + int + pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count, + int flags, glfs_io_cbk fn, void *data) +@@ -2003,8 +1974,7 @@ pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, 6.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0) + int + pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count, + off_t offset, int flags, glfs_io_cbk34 fn, void *data) +@@ -2023,8 +1993,7 @@ pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, 6.0) + int + pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count, + off_t offset, int flags, glfs_io_cbk fn, void *data) +@@ -2043,8 +2012,7 @@ pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, 6.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0) + int + pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov, + int count, int flags, glfs_io_cbk34 fn, void *data) +@@ -2056,8 +2024,7 @@ pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov, + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, 6.0) + int + pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count, + int flags, glfs_io_cbk fn, void *data) +@@ -2069,8 +2036,6 @@ pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, 6.0); +- + static int + glfs_fsync_common(struct glfs_fd *glfd, struct glfs_stat *prestat, + struct glfs_stat *poststat) +@@ -2135,14 +2100,14 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0) + int + pub_glfs_fsync34(struct glfs_fd *glfd) + { + return glfs_fsync_common(glfd, NULL, NULL); + } + +-GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, 6.0) + int + pub_glfs_fsync(struct glfs_fd *glfd, struct glfs_stat *prestat, + struct glfs_stat *poststat) +@@ -2150,8 +2115,6 @@ pub_glfs_fsync(struct glfs_fd *glfd, struct glfs_stat *prestat, + return glfs_fsync_common(glfd, prestat, poststat); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, 6.0); +- + static int + glfs_fsync_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +@@ -2232,6 +2195,7 @@ out: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0) + int + pub_glfs_fsync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data) + { +@@ -2248,8 +2212,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, 6.0) + int + pub_glfs_fsync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data) + { +@@ -2266,8 +2229,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, 6.0); +- + static int + glfs_fdatasync_common(struct glfs_fd *glfd, struct glfs_stat *prestat, + struct glfs_stat *poststat) +@@ -2332,14 +2293,14 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0) + int + pub_glfs_fdatasync34(struct glfs_fd *glfd) + { + return glfs_fdatasync_common(glfd, NULL, NULL); + } + +-GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, 6.0) + int + pub_glfs_fdatasync(struct glfs_fd *glfd, struct glfs_stat *prestat, + struct glfs_stat *poststat) +@@ -2347,8 +2308,7 @@ pub_glfs_fdatasync(struct glfs_fd *glfd, struct glfs_stat *prestat, + return glfs_fdatasync_common(glfd, prestat, poststat); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, 6.0); +- ++GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0) + int + pub_glfs_fdatasync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data) + { +@@ -2365,8 +2325,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, 6.0) + int + pub_glfs_fdatasync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data) + { +@@ -2383,8 +2342,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, 6.0); +- + static int + glfs_ftruncate_common(struct glfs_fd *glfd, off_t offset, + struct glfs_stat *prestat, struct glfs_stat *poststat) +@@ -2450,14 +2407,14 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0) + int + pub_glfs_ftruncate34(struct glfs_fd *glfd, off_t offset) + { + return glfs_ftruncate_common(glfd, offset, NULL, NULL); + } + +-GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, 6.0) + int + pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset, + struct glfs_stat *prestat, struct glfs_stat *poststat) +@@ -2465,8 +2422,7 @@ pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset, + return glfs_ftruncate_common(glfd, offset, prestat, poststat); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, 6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15) + int + pub_glfs_truncate(struct glfs *fs, const char *path, off_t length) + { +@@ -2512,8 +2468,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15); +- + static int + glfs_ftruncate_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +@@ -2606,6 +2560,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0) + int + pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn, + void *data) +@@ -2614,8 +2569,7 @@ pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn, + data); + } + +-GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, 6.0) + int + pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn, + void *data) +@@ -2623,8 +2577,7 @@ pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn, + return glfs_ftruncate_async_common(glfd, offset, _gf_false, fn, data); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, 6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0) + int + pub_glfs_access(struct glfs *fs, const char *path, int mode) + { +@@ -2670,8 +2623,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0) + int + pub_glfs_symlink(struct glfs *fs, const char *data, const char *path) + { +@@ -2761,8 +2713,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0) + int + pub_glfs_readlink(struct glfs *fs, const char *path, char *buf, size_t bufsiz) + { +@@ -2819,8 +2770,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0) + int + pub_glfs_mknod(struct glfs *fs, const char *path, mode_t mode, dev_t dev) + { +@@ -2910,8 +2860,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0) + int + pub_glfs_mkdir(struct glfs *fs, const char *path, mode_t mode) + { +@@ -3001,8 +2950,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0) + int + pub_glfs_unlink(struct glfs *fs, const char *path) + { +@@ -3058,8 +3006,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0) + int + pub_glfs_rmdir(struct glfs *fs, const char *path) + { +@@ -3114,8 +3061,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0) + int + pub_glfs_rename(struct glfs *fs, const char *oldpath, const char *newpath) + { +@@ -3204,8 +3150,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0) + int + pub_glfs_link(struct glfs *fs, const char *oldpath, const char *newpath) + { +@@ -3291,8 +3236,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0) + struct glfs_fd * + pub_glfs_opendir(struct glfs *fs, const char *path) + { +@@ -3373,8 +3317,7 @@ invalid_fs: + return glfd; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0) + int + pub_glfs_closedir(struct glfs_fd *glfd) + { +@@ -3395,16 +3338,14 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0) + long + pub_glfs_telldir(struct glfs_fd *fd) + { + return fd->offset; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0) + void + pub_glfs_seekdir(struct glfs_fd *fd, long offset) + { +@@ -3433,8 +3374,6 @@ pub_glfs_seekdir(struct glfs_fd *fd, long offset) + */ + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0); +- + static int + glfs_discard_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, +@@ -3525,6 +3464,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0) + int + pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len, + glfs_io_cbk34 fn, void *data) +@@ -3533,8 +3473,7 @@ pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len, + data); + } + +-GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, 6.0) + int + pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len, + glfs_io_cbk fn, void *data) +@@ -3542,8 +3481,6 @@ pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len, + return glfs_discard_async_common(glfd, offset, len, _gf_false, fn, data); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, 6.0); +- + static int + glfs_zerofill_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, +@@ -3636,6 +3573,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0) + int + pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len, + glfs_io_cbk34 fn, void *data) +@@ -3644,8 +3582,7 @@ pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len, + data); + } + +-GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, 6.0) + int + pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len, + glfs_io_cbk fn, void *data) +@@ -3653,8 +3590,6 @@ pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len, + return glfs_zerofill_async_common(glfd, offset, len, _gf_false, fn, data); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, 6.0); +- + void + gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent) + { +@@ -3814,6 +3749,7 @@ unlock: + return buf; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0) + int + pub_glfs_readdirplus_r(struct glfs_fd *glfd, struct stat *stat, + struct dirent *ext, struct dirent **res) +@@ -3869,8 +3805,7 @@ invalid_fs: + return -1; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0) + int + pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf, + struct dirent **res) +@@ -3878,8 +3813,7 @@ pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf, + return pub_glfs_readdirplus_r(glfd, 0, buf, res); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0) + struct dirent * + pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat) + { +@@ -3893,16 +3827,14 @@ pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat) + return res; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0) + struct dirent * + pub_glfs_readdir(struct glfs_fd *glfd) + { + return pub_glfs_readdirplus(glfd, NULL); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0) + int + pub_glfs_statvfs(struct glfs *fs, const char *path, struct statvfs *buf) + { +@@ -3948,8 +3880,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setattr, 6.0) + int + pub_glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat, + int follow) +@@ -4009,8 +3940,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setattr, 6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetattr, 6.0) + int + pub_glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat) + { +@@ -4063,8 +3993,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetattr, 6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0) + int + pub_glfs_chmod(struct glfs *fs, const char *path, mode_t mode) + { +@@ -4081,8 +4010,7 @@ pub_glfs_chmod(struct glfs *fs, const char *path, mode_t mode) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0) + int + pub_glfs_fchmod(struct glfs_fd *glfd, mode_t mode) + { +@@ -4099,8 +4027,7 @@ pub_glfs_fchmod(struct glfs_fd *glfd, mode_t mode) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0) + int + pub_glfs_chown(struct glfs *fs, const char *path, uid_t uid, gid_t gid) + { +@@ -4125,8 +4052,7 @@ pub_glfs_chown(struct glfs *fs, const char *path, uid_t uid, gid_t gid) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0) + int + pub_glfs_lchown(struct glfs *fs, const char *path, uid_t uid, gid_t gid) + { +@@ -4151,8 +4077,7 @@ pub_glfs_lchown(struct glfs *fs, const char *path, uid_t uid, gid_t gid) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0) + int + pub_glfs_fchown(struct glfs_fd *glfd, uid_t uid, gid_t gid) + { +@@ -4177,8 +4102,7 @@ pub_glfs_fchown(struct glfs_fd *glfd, uid_t uid, gid_t gid) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0) + int + pub_glfs_utimens(struct glfs *fs, const char *path, + const struct timespec times[2]) +@@ -4198,8 +4122,7 @@ pub_glfs_utimens(struct glfs *fs, const char *path, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0) + int + pub_glfs_lutimens(struct glfs *fs, const char *path, + const struct timespec times[2]) +@@ -4219,8 +4142,7 @@ pub_glfs_lutimens(struct glfs *fs, const char *path, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0) + int + pub_glfs_futimens(struct glfs_fd *glfd, const struct timespec times[2]) + { +@@ -4239,8 +4161,6 @@ pub_glfs_futimens(struct glfs_fd *glfd, const struct timespec times[2]) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0); +- + int + glfs_getxattr_process(void *value, size_t size, dict_t *xattr, const char *name) + { +@@ -4340,6 +4260,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0) + ssize_t + pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name, + void *value, size_t size) +@@ -4347,8 +4268,7 @@ pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name, + return glfs_getxattr_common(fs, path, name, value, size, 1); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0) + ssize_t + pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name, + void *value, size_t size) +@@ -4356,8 +4276,7 @@ pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name, + return glfs_getxattr_common(fs, path, name, value, size, 0); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0) + ssize_t + pub_glfs_fgetxattr(struct glfs_fd *glfd, const char *name, void *value, + size_t size) +@@ -4420,8 +4339,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0); +- + int + glfs_listxattr_process(void *value, size_t size, dict_t *xattr) + { +@@ -4505,22 +4422,21 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0) + ssize_t + pub_glfs_listxattr(struct glfs *fs, const char *path, void *value, size_t size) + { + return glfs_listxattr_common(fs, path, value, size, 1); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0) + ssize_t + pub_glfs_llistxattr(struct glfs *fs, const char *path, void *value, size_t size) + { + return glfs_listxattr_common(fs, path, value, size, 0); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0) + ssize_t + pub_glfs_flistxattr(struct glfs_fd *glfd, void *value, size_t size) + { +@@ -4570,8 +4486,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0); +- + int + glfs_setxattr_common(struct glfs *fs, const char *path, const char *name, + const void *value, size_t size, int flags, int follow) +@@ -4651,6 +4565,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0) + int + pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name, + const void *value, size_t size, int flags) +@@ -4658,8 +4573,7 @@ pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name, + return glfs_setxattr_common(fs, path, name, value, size, flags, 1); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0) + int + pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name, + const void *value, size_t size, int flags) +@@ -4667,8 +4581,7 @@ pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name, + return glfs_setxattr_common(fs, path, name, value, size, flags, 0); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0) + int + pub_glfs_fsetxattr(struct glfs_fd *glfd, const char *name, const void *value, + size_t size, int flags) +@@ -4743,8 +4656,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0); +- + int + glfs_removexattr_common(struct glfs *fs, const char *path, const char *name, + int follow) +@@ -4795,22 +4706,21 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0) + int + pub_glfs_removexattr(struct glfs *fs, const char *path, const char *name) + { + return glfs_removexattr_common(fs, path, name, 1); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0) + int + pub_glfs_lremovexattr(struct glfs *fs, const char *path, const char *name) + { + return glfs_removexattr_common(fs, path, name, 0); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0) + int + pub_glfs_fremovexattr(struct glfs_fd *glfd, const char *name) + { +@@ -4853,8 +4763,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0) + int + pub_glfs_fallocate(struct glfs_fd *glfd, int keep_size, off_t offset, + size_t len) +@@ -4905,8 +4814,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0) + int + pub_glfs_discard(struct glfs_fd *glfd, off_t offset, size_t len) + { +@@ -4956,8 +4864,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0) + int + pub_glfs_zerofill(struct glfs_fd *glfd, off_t offset, off_t len) + { +@@ -5005,8 +4912,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0) + int + pub_glfs_chdir(struct glfs *fs, const char *path) + { +@@ -5056,8 +4962,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0) + int + pub_glfs_fchdir(struct glfs_fd *glfd) + { +@@ -5109,8 +5014,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0); +- + static gf_boolean_t warn_realpath = _gf_true; /* log once */ + + static char * +@@ -5193,22 +5096,21 @@ invalid_fs: + return retpath; + } + ++GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0) + char * + pub_glfs_realpath34(struct glfs *fs, const char *path, char *resolved_path) + { + return glfs_realpath_common(fs, path, resolved_path, _gf_true); + } + +-GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17) + char * + pub_glfs_realpath(struct glfs *fs, const char *path, char *resolved_path) + { + return glfs_realpath_common(fs, path, resolved_path, _gf_false); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0) + char * + pub_glfs_getcwd(struct glfs *fs, char *buf, size_t n) + { +@@ -5257,8 +5159,6 @@ invalid_fs: + return buf; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0); +- + static void + gf_flock_to_flock(struct gf_flock *gf_flock, struct flock *flock) + { +@@ -5367,6 +5267,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0) + int + pub_glfs_file_lock(struct glfs_fd *glfd, int cmd, struct flock *flock, + glfs_lock_mode_t lk_mode) +@@ -5404,16 +5305,14 @@ out: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0) + int + pub_glfs_posix_lock(struct glfs_fd *glfd, int cmd, struct flock *flock) + { + return glfs_lock_common(glfd, cmd, flock, NULL); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7) + int + pub_glfs_fd_set_lkowner(struct glfs_fd *glfd, void *data, int len) + { +@@ -5449,8 +5348,8 @@ out: + invalid_fs: + return ret; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0) + struct glfs_fd * + pub_glfs_dup(struct glfs_fd *glfd) + { +@@ -5501,8 +5400,6 @@ invalid_fs: + return dupfd; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0); +- + static void + glfs_enqueue_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data) + { +@@ -5986,6 +5883,7 @@ out: + * Otherwise all the upcall events are queued up in a list + * to be read/polled by the applications. + */ ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0) + void + priv_glfs_process_upcall_event(struct glfs *fs, void *data) + { +@@ -6053,7 +5951,6 @@ out: + err: + return; + } +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0); + + ssize_t + glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object, +@@ -6241,6 +6138,7 @@ glfs_release_xreaddirp_stat(void *ptr) + * Given glfd of a directory, this function does readdirp and returns + * xstat along with dirents. + */ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0) + int + pub_glfs_xreaddirplus_r(struct glfs_fd *glfd, uint32_t flags, + struct glfs_xreaddirp_stat **xstat_p, +@@ -6349,8 +6247,8 @@ out: + invalid_fs: + return -1; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0) + struct stat * + pub_glfs_xreaddirplus_get_stat(struct glfs_xreaddirp_stat *xstat) + { +@@ -6366,7 +6264,6 @@ pub_glfs_xreaddirplus_get_stat(struct glfs_xreaddirp_stat *xstat) + out: + return NULL; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0); + + void + gf_lease_to_glfs_lease(struct gf_lease *gf_lease, struct glfs_lease *lease) +@@ -6386,6 +6283,7 @@ glfs_lease_to_gf_lease(struct glfs_lease *lease, struct gf_lease *gf_lease) + memcpy(gf_lease->lease_id, lease->lease_id, LEASE_ID_SIZE); + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0) + int + pub_glfs_lease(struct glfs_fd *glfd, struct glfs_lease *lease, + glfs_recall_cbk fn, void *data) +@@ -6487,5 +6385,3 @@ out: + invalid_fs: + return ret; + } +- +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0); +diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c +index 7b8ff1468..4315a34d2 100644 +--- a/api/src/glfs-handleops.c ++++ b/api/src/glfs-handleops.c +@@ -60,6 +60,7 @@ glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt, + return; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4) + struct glfs_object * + pub_glfs_h_lookupat(struct glfs *fs, struct glfs_object *parent, + const char *path, struct stat *stat, int follow) +@@ -126,8 +127,7 @@ invalid_fs: + return object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4); +- ++GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2) + struct glfs_object * + pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent, + const char *path, struct stat *stat) +@@ -135,8 +135,7 @@ pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent, + return pub_glfs_h_lookupat(fs, parent, path, stat, 0); + } + +-GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0) + int + pub_glfs_h_statfs(struct glfs *fs, struct glfs_object *object, + struct statvfs *statvfs) +@@ -194,8 +193,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2) + int + pub_glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat) + { +@@ -259,8 +257,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2) + int + pub_glfs_h_getattrs(struct glfs *fs, struct glfs_object *object, + struct stat *stat) +@@ -317,8 +314,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2); +- + int + glfs_h_getxattrs_common(struct glfs *fs, struct glfs_object *object, + dict_t **xattr, const char *name, +@@ -380,6 +375,7 @@ out: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1) + int + pub_glfs_h_getxattrs(struct glfs *fs, struct glfs_object *object, + const char *name, void *value, size_t size) +@@ -416,8 +412,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2) + int + pub_glfs_h_setattrs(struct glfs *fs, struct glfs_object *object, + struct stat *stat, int valid) +@@ -480,8 +475,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0) + int + pub_glfs_h_setxattrs(struct glfs *fs, struct glfs_object *object, + const char *name, const void *value, size_t size, +@@ -568,8 +562,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1) + int + pub_glfs_h_removexattrs(struct glfs *fs, struct glfs_object *object, + const char *name) +@@ -626,8 +619,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2) + struct glfs_fd * + pub_glfs_h_open(struct glfs *fs, struct glfs_object *object, int flags) + { +@@ -727,8 +719,7 @@ invalid_fs: + return glfd; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2) + struct glfs_object * + pub_glfs_h_creat(struct glfs *fs, struct glfs_object *parent, const char *path, + int flags, mode_t mode, struct stat *stat) +@@ -840,7 +831,7 @@ invalid_fs: + return object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2); ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 3.4.2); + + struct glfs_object * + pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, +@@ -975,8 +966,7 @@ invalid_fs: + return object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2) + struct glfs_object * + pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, struct stat *stat) +@@ -1074,8 +1064,7 @@ invalid_fs: + return object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2) + struct glfs_object * + pub_glfs_h_mknod(struct glfs *fs, struct glfs_object *parent, const char *path, + mode_t mode, dev_t dev, struct stat *stat) +@@ -1172,8 +1161,7 @@ invalid_fs: + return object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2) + int + pub_glfs_h_unlink(struct glfs *fs, struct glfs_object *parent, const char *path) + { +@@ -1244,8 +1232,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2) + struct glfs_fd * + pub_glfs_h_opendir(struct glfs *fs, struct glfs_object *object) + { +@@ -1327,8 +1314,7 @@ invalid_fs: + return glfd; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0) + int + pub_glfs_h_access(struct glfs *fs, struct glfs_object *object, int mask) + { +@@ -1385,8 +1371,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2) + ssize_t + pub_glfs_h_extract_handle(struct glfs_object *object, unsigned char *handle, + int len) +@@ -1417,8 +1402,7 @@ out: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2) + struct glfs_object * + pub_glfs_h_create_from_handle(struct glfs *fs, unsigned char *handle, int len, + struct stat *stat) +@@ -1541,8 +1525,7 @@ invalid_fs: + return object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2) + int + pub_glfs_h_close(struct glfs_object *object) + { +@@ -1555,8 +1538,7 @@ pub_glfs_h_close(struct glfs_object *object) + return 0; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2) + int + pub_glfs_h_truncate(struct glfs *fs, struct glfs_object *object, off_t offset) + { +@@ -1616,8 +1598,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2) + struct glfs_object * + pub_glfs_h_symlink(struct glfs *fs, struct glfs_object *parent, + const char *name, const char *data, struct stat *stat) +@@ -1716,8 +1697,7 @@ invalid_fs: + return object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2) + int + pub_glfs_h_readlink(struct glfs *fs, struct glfs_object *object, char *buf, + size_t bufsiz) +@@ -1782,8 +1762,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2) + int + pub_glfs_h_link(struct glfs *fs, struct glfs_object *linksrc, + struct glfs_object *parent, const char *name) +@@ -1880,8 +1859,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2) + int + pub_glfs_h_rename(struct glfs *fs, struct glfs_object *olddir, + const char *oldname, struct glfs_object *newdir, +@@ -1991,8 +1969,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2); +- + /* + * Given a handle/gfid, find if the corresponding inode is present in + * the inode table. If yes create and return the corresponding glfs_object. +@@ -2200,6 +2176,7 @@ glfs_release_upcall(void *ptr) + * calling glfs_fini(..). Hence making an assumption that 'fs' & ctx structures + * cannot be freed while in this routine. + */ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16) + int + pub_glfs_h_poll_upcall(struct glfs *fs, struct glfs_upcall **up_arg) + { +@@ -2317,8 +2294,6 @@ err: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16); +- + static gf_boolean_t log_upcall370 = _gf_true; /* log once */ + + /* The old glfs_h_poll_upcall interface requires intimate knowledge of the +@@ -2332,6 +2307,7 @@ static gf_boolean_t log_upcall370 = _gf_true; /* log once */ + * + * WARNING: this function will be removed in the future. + */ ++GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0) + int + pub_glfs_h_poll_upcall370(struct glfs *fs, struct glfs_callback_arg *up_arg) + { +@@ -2399,12 +2375,11 @@ out: + return ret; + } + +-GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0); +- + #ifdef HAVE_ACL_LIBACL_H + #include <glusterfs/glusterfs-acl.h> + #include <acl/libacl.h> + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0) + int + pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object, + const acl_type_t type, const acl_t acl) +@@ -2453,6 +2428,7 @@ invalid_fs: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0) + acl_t + pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object, + const acl_type_t type) +@@ -2507,6 +2483,7 @@ invalid_fs: + return acl; + } + #else /* !HAVE_ACL_LIBACL_H */ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0) + acl_t + pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object, + const acl_type_t type) +@@ -2515,6 +2492,7 @@ pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object, + return NULL; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0) + int + pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object, + const acl_type_t type, const acl_t acl) +@@ -2523,10 +2501,9 @@ pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object, + return -1; + } + #endif +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0); +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0); + + /* The API to perform read using anonymous fd */ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0) + ssize_t + pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object, + const void *buf, size_t count, off_t offset) +@@ -2550,9 +2527,8 @@ pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0); +- + /* The API to perform write using anonymous fd */ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0) + ssize_t + pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object, + const void *buf, size_t count, off_t offset) +@@ -2576,8 +2552,7 @@ pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object, + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0) + struct glfs_object * + pub_glfs_object_copy(struct glfs_object *src) + { +@@ -2600,8 +2575,8 @@ pub_glfs_object_copy(struct glfs_object *src) + out: + return object; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0) + struct glfs_object * + pub_glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat) + { +@@ -2618,8 +2593,8 @@ pub_glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat) + out: + return NULL; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0) + int + pub_glfs_h_lease(struct glfs *fs, struct glfs_object *object, + struct glfs_lease *lease) +@@ -2681,5 +2656,3 @@ out: + invalid_fs: + return ret; + } +- +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0); +diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h +index 15cf0ee00..a6cd1ff20 100644 +--- a/api/src/glfs-internal.h ++++ b/api/src/glfs-internal.h +@@ -81,25 +81,40 @@ + #ifndef GFAPI_PRIVATE + #define GFAPI_PRIVATE(sym, ver) /**/ + #endif ++#if __GNUC__ >= 10 + #define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \ +- asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver)) ++ __attribute__((__symver__(STR(fn) "@@GFAPI_" STR(ver)))) + + #define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \ +- asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver)) ++ __attribute__((__symver__(STR(fn) "@@GFAPI_PRIVATE_" STR(ver)))) + + #define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \ +- asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver)) ++ __attribute__((__symver__(STR(fn2) "@GFAPI_" STR(ver)))) + + #define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \ +- asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver)) ++ __attribute__((__symver__(STR(fn2) "@GFAPI_PRIVATE_" STR(ver)))) ++ ++#else ++#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \ ++ asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver)); ++ ++#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \ ++ asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver)); ++ ++#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \ ++ asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver)); ++ ++#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \ ++ asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver)); ++#endif + #define STR(str) #str + #else + #ifndef GFAPI_PUBLIC +-#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver)) ++#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver)); + #endif + #ifndef GFAPI_PRIVATE + #define GFAPI_PRIVATE(sym, ver) \ +- __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver)) ++ __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver)); + #endif + #define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, dotver) /**/ + #define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, dotver) /**/ +diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c +index 7476d5b64..07be8a403 100644 +--- a/api/src/glfs-mgmt.c ++++ b/api/src/glfs-mgmt.c +@@ -383,6 +383,7 @@ out: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0) + int + pub_glfs_get_volumeid(struct glfs *fs, char *volid, size_t size) + { +@@ -439,8 +440,6 @@ invalid_fs: + return -1; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0); +- + int + glfs_get_volume_info(struct glfs *fs) + { +diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c +index 58b6ace58..c3e114a39 100644 +--- a/api/src/glfs-resolve.c ++++ b/api/src/glfs-resolve.c +@@ -163,6 +163,7 @@ __glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode, + return newinode; + } + ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0) + int + priv_glfs_loc_touchup(loc_t *loc) + { +@@ -177,8 +178,6 @@ priv_glfs_loc_touchup(loc_t *loc) + return ret; + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0); +- + int + glfs_resolve_symlink(struct glfs *fs, xlator_t *subvol, inode_t *inode, + char **lpath) +@@ -466,6 +465,7 @@ out: + return inode; + } + ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0) + int + priv_glfs_resolve_at(struct glfs *fs, xlator_t *subvol, inode_t *at, + const char *origpath, loc_t *loc, struct iatt *iatt, +@@ -616,8 +616,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0); +- + int + glfs_resolve_path(struct glfs *fs, xlator_t *subvol, const char *origpath, + loc_t *loc, struct iatt *iatt, int follow, int reval) +@@ -646,6 +644,7 @@ out: + return ret; + } + ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0) + int + priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath, + loc_t *loc, struct iatt *iatt, int reval) +@@ -656,7 +655,6 @@ priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath, + + return ret; + } +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0); + + int + glfs_lresolve(struct glfs *fs, xlator_t *subvol, const char *origpath, +@@ -977,6 +975,7 @@ __glfs_active_subvol(struct glfs *fs) + return new_subvol; + } + ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0) + void + priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol) + { +@@ -1004,8 +1003,7 @@ priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol) + } + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0); +- ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0) + xlator_t * + priv_glfs_active_subvol(struct glfs *fs) + { +@@ -1033,8 +1031,6 @@ priv_glfs_active_subvol(struct glfs *fs) + return subvol; + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0); +- + int + __glfs_cwd_set(struct glfs *fs, inode_t *inode) + { +diff --git a/api/src/glfs.c b/api/src/glfs.c +index ae994faaf..b85b1c4be 100644 +--- a/api/src/glfs.c ++++ b/api/src/glfs.c +@@ -277,6 +277,7 @@ out: + + /////////////////////////////////////////////////////////////////////////////// + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0) + int + pub_glfs_set_xlator_option(struct glfs *fs, const char *xlator, const char *key, + const char *value) +@@ -326,8 +327,7 @@ invalid_fs: + return -1; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1) + int + pub_glfs_unset_volfile_server(struct glfs *fs, const char *transport, + const char *host, const int port) +@@ -385,8 +385,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0) + int + pub_glfs_set_volfile_server(struct glfs *fs, const char *transport, + const char *host, int port) +@@ -468,8 +467,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0); +- + /* * + * Used to free the arguments allocated by glfs_set_volfile_server() + */ +@@ -512,6 +509,7 @@ glfs_free_xlator_options(cmd_args_t *cmd_args) + } + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2) + int + pub_glfs_setfsuid(uid_t fsuid) + { +@@ -521,8 +519,7 @@ pub_glfs_setfsuid(uid_t fsuid) + return syncopctx_setfsuid(&fsuid); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2) + int + pub_glfs_setfsgid(gid_t fsgid) + { +@@ -532,8 +529,7 @@ pub_glfs_setfsgid(gid_t fsgid) + return syncopctx_setfsgid(&fsgid); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2) + int + pub_glfs_setfsgroups(size_t size, const gid_t *list) + { +@@ -543,8 +539,7 @@ pub_glfs_setfsgroups(size_t size, const gid_t *list) + return syncopctx_setfsgroups(size, list); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0) + int + pub_glfs_setfsleaseid(glfs_leaseid_t leaseid) + { +@@ -566,8 +561,6 @@ pub_glfs_setfsleaseid(glfs_leaseid_t leaseid) + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0); +- + int + get_fop_attr_glfd(dict_t **fop_attr, struct glfs_fd *glfd) + { +@@ -655,14 +648,14 @@ unset_fop_attr(dict_t **fop_attr) + *fop_attr = NULL; + } + } ++ ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0) + struct glfs * + pub_glfs_from_glfd(struct glfs_fd *glfd) + { + return glfd->fs; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0); +- + static void + glfs_fd_destroy(struct glfs_fd *glfd) + { +@@ -811,6 +804,7 @@ unlock: + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0) + struct glfs * + pub_glfs_new(const char *volname) + { +@@ -899,8 +893,7 @@ out: + return fs; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0); +- ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0) + struct glfs * + priv_glfs_new_from_ctx(glusterfs_ctx_t *ctx) + { +@@ -919,8 +912,7 @@ out: + return fs; + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0); +- ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0) + void + priv_glfs_free_from_ctx(struct glfs *fs) + { +@@ -956,8 +948,7 @@ priv_glfs_free_from_ctx(struct glfs *fs) + FREE(fs); + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0) + int + pub_glfs_set_volfile(struct glfs *fs, const char *volfile) + { +@@ -974,8 +965,7 @@ pub_glfs_set_volfile(struct glfs *fs, const char *volfile) + return 0; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0) + int + pub_glfs_set_logging(struct glfs *fs, const char *logfile, int loglevel) + { +@@ -1013,8 +1003,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0); +- + int + glfs_init_wait(struct glfs *fs) + { +@@ -1033,6 +1021,7 @@ glfs_init_wait(struct glfs *fs) + return ret; + } + ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0) + void + priv_glfs_init_done(struct glfs *fs, int ret) + { +@@ -1064,8 +1053,6 @@ out: + return; + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0); +- + int + glfs_init_common(struct glfs *fs) + { +@@ -1106,6 +1093,7 @@ glfs_init_async(struct glfs *fs, glfs_init_cbk cbk) + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0) + int + pub_glfs_init(struct glfs *fs) + { +@@ -1139,8 +1127,6 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0); +- + static int + glusterfs_ctx_destroy(glusterfs_ctx_t *ctx) + { +@@ -1218,6 +1204,7 @@ glusterfs_ctx_destroy(glusterfs_ctx_t *ctx) + return ret; + } + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0) + int + pub_glfs_fini(struct glfs *fs) + { +@@ -1412,8 +1399,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0) + ssize_t + pub_glfs_get_volfile(struct glfs *fs, void *buf, size_t len) + { +@@ -1439,8 +1425,7 @@ invalid_fs: + return res; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0); +- ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0) + int + priv_glfs_ipc(struct glfs *fs, int opcode, void *xd_in, void **xd_out) + { +@@ -1468,8 +1453,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0); +- ++GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1) + int + priv_glfs_setfspid(struct glfs *fs, pid_t pid) + { +@@ -1483,107 +1467,104 @@ priv_glfs_setfspid(struct glfs *fs, pid_t pid) + + return ret; + } +-GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16) + void + pub_glfs_free(void *ptr) + { + GLFS_FREE(ptr); + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16) + struct glfs * + pub_glfs_upcall_get_fs(struct glfs_upcall *arg) + { + return arg->fs; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16) + enum glfs_upcall_reason + pub_glfs_upcall_get_reason(struct glfs_upcall *arg) + { + return arg->reason; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16) + void * + pub_glfs_upcall_get_event(struct glfs_upcall *arg) + { + return arg->event; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16) + struct glfs_object * + pub_glfs_upcall_inode_get_object(struct glfs_upcall_inode *arg) + { + return arg->object; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16) + uint64_t + pub_glfs_upcall_inode_get_flags(struct glfs_upcall_inode *arg) + { + return arg->flags; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16) + struct stat * + pub_glfs_upcall_inode_get_stat(struct glfs_upcall_inode *arg) + { + return &arg->buf; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16) + uint64_t + pub_glfs_upcall_inode_get_expire(struct glfs_upcall_inode *arg) + { + return arg->expire_time_attr; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16) + struct glfs_object * + pub_glfs_upcall_inode_get_pobject(struct glfs_upcall_inode *arg) + { + return arg->p_object; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16) + struct stat * + pub_glfs_upcall_inode_get_pstat(struct glfs_upcall_inode *arg) + { + return &arg->p_buf; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16) + struct glfs_object * + pub_glfs_upcall_inode_get_oldpobject(struct glfs_upcall_inode *arg) + { + return arg->oldp_object; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16) + struct stat * + pub_glfs_upcall_inode_get_oldpstat(struct glfs_upcall_inode *arg) + { + return &arg->oldp_buf; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16); + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6) + struct glfs_object * + pub_glfs_upcall_lease_get_object(struct glfs_upcall_lease *arg) + { + return arg->object; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6) + uint32_t + pub_glfs_upcall_lease_get_lease_type(struct glfs_upcall_lease *arg) + { + return arg->lease_type; + } +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6); + + /* definitions of the GLFS_SYSRQ_* chars are in glfs.h */ + static struct glfs_sysrq_help { +@@ -1593,6 +1574,7 @@ static struct glfs_sysrq_help { + {GLFS_SYSRQ_STATEDUMP, "(S)tatedump"}, + {0, NULL}}; + ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0) + int + pub_glfs_sysrq(struct glfs *fs, char sysrq) + { +@@ -1641,8 +1623,7 @@ out: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0) + int + pub_glfs_upcall_register(struct glfs *fs, uint32_t event_list, + glfs_upcall_cbk cbk, void *data) +@@ -1698,8 +1679,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0) + int + pub_glfs_upcall_unregister(struct glfs *fs, uint32_t event_list) + { +@@ -1746,8 +1726,7 @@ invalid_fs: + return ret; + } + +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0); +- ++GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 6.4) + int + pub_glfs_set_statedump_path(struct glfs *fs, const char *path) + { +@@ -1807,5 +1786,3 @@ err: + invalid_fs: + return -1; + } +- +-GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 6.4); +diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c +index 270b76f44..c44ef1087 100644 +--- a/cli/src/cli-cmd-global.c ++++ b/cli/src/cli-cmd-global.c +@@ -27,7 +27,6 @@ + #include <glusterfs/syscall.h> + #include <glusterfs/common-utils.h> + +-extern rpc_clnt_prog_t *cli_rpc_prog; + + int + cli_cmd_global_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word, +diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c +index 04dd2efc2..6eaac8b92 100644 +--- a/cli/src/cli-cmd-misc.c ++++ b/cli/src/cli-cmd-misc.c +@@ -18,10 +18,6 @@ + #include "cli-mem-types.h" + #include "protocol-common.h" + +-extern struct rpc_clnt *global_rpc; +- +-extern rpc_clnt_prog_t *cli_rpc_prog; +- + extern struct cli_cmd volume_cmds[]; + extern struct cli_cmd bitrot_cmds[]; + extern struct cli_cmd quota_cmds[]; +diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c +index e42a1139b..55deb4369 100644 +--- a/cli/src/cli-cmd-peer.c ++++ b/cli/src/cli-cmd-peer.c +@@ -20,10 +20,6 @@ + #include "protocol-common.h" + #include <glusterfs/events.h> + +-extern struct rpc_clnt *global_rpc; +- +-extern rpc_clnt_prog_t *cli_rpc_prog; +- + int + cli_cmd_peer_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word, + const char **words, int wordcount); +diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c +index 814ab82f6..3c523ad17 100644 +--- a/cli/src/cli-cmd-snapshot.c ++++ b/cli/src/cli-cmd-snapshot.c +@@ -17,8 +17,6 @@ + #include "cli-cmd.h" + #include "cli-mem-types.h" + +-extern rpc_clnt_prog_t *cli_rpc_prog; +- + int + cli_cmd_snapshot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word, + const char **words, int wordcount); +diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c +index cb3a9ea74..ca802187a 100644 +--- a/cli/src/cli-cmd-system.c ++++ b/cli/src/cli-cmd-system.c +@@ -18,10 +18,6 @@ + #include "cli-mem-types.h" + #include "protocol-common.h" + +-extern struct rpc_clnt *global_rpc; +- +-extern rpc_clnt_prog_t *cli_rpc_prog; +- + int + cli_cmd_system_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word, + const char **words, int wordcount); +diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c +index b6bef80f1..8186ed7bc 100644 +--- a/cli/src/cli-cmd-volume.c ++++ b/cli/src/cli-cmd-volume.c +@@ -28,10 +28,6 @@ + #include <glusterfs/common-utils.h> + #include <glusterfs/events.h> + +-extern struct rpc_clnt *global_rpc; +-extern struct rpc_clnt *global_quotad_rpc; +- +-extern rpc_clnt_prog_t *cli_rpc_prog; + extern rpc_clnt_prog_t cli_quotad_clnt; + + int +diff --git a/cli/src/cli-quotad-client.c b/cli/src/cli-quotad-client.c +index 52ab97ee8..1da7b3f0a 100644 +--- a/cli/src/cli-quotad-client.c ++++ b/cli/src/cli-quotad-client.c +@@ -10,9 +10,6 @@ + + #include "cli-quotad-client.h" + +-extern struct rpc_clnt global_quotad_rpc; +-extern struct rpc_clnt_program cli_quotad_clnt; +- + int + cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog, + int procnum, struct iobref *iobref, xlator_t *this, +diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c +index 51b544786..ef320b019 100644 +--- a/cli/src/cli-rpc-ops.c ++++ b/cli/src/cli-rpc-ops.c +@@ -48,10 +48,7 @@ + + enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK }; + +-extern struct rpc_clnt *global_quotad_rpc; +-extern rpc_clnt_prog_t cli_quotad_clnt; +-extern rpc_clnt_prog_t *cli_rpc_prog; +-extern int cli_op_ret; ++rpc_clnt_prog_t cli_quotad_clnt; + extern int connected; + + int32_t +diff --git a/cli/src/cli.c b/cli/src/cli.c +index a76c5a263..74e055222 100644 +--- a/cli/src/cli.c ++++ b/cli/src/cli.c +@@ -74,6 +74,10 @@ rpc_clnt_prog_t *cli_rpc_prog; + + extern struct rpc_clnt_program cli_prog; + ++int cli_default_conn_timeout = 120; ++int cli_ten_minutes_timeout = 600; ++ ++ + static int + glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) + { +diff --git a/cli/src/cli.h b/cli/src/cli.h +index b5b69ea48..21982594e 100644 +--- a/cli/src/cli.h ++++ b/cli/src/cli.h +@@ -42,8 +42,8 @@ enum argp_option_keys { + ARGP_PORT_KEY = 'p', + }; + +-int cli_default_conn_timeout; +-int cli_ten_minutes_timeout; ++extern int cli_default_conn_timeout; ++extern int cli_ten_minutes_timeout; + + typedef enum { + COLD_BRICK_COUNT, +@@ -191,6 +191,12 @@ typedef ssize_t (*cli_serialize_t)(struct iovec outmsg, void *args); + + extern struct cli_state *global_state; /* use only in readline callback */ + ++extern struct rpc_clnt *global_quotad_rpc; ++ ++extern struct rpc_clnt *global_rpc; ++ ++extern rpc_clnt_prog_t *cli_rpc_prog; ++ + typedef const char *(*cli_selector_t)(void *wcon); + + char * +-- +2.27.0 + diff --git a/SOURCES/0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch b/SOURCES/0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch new file mode 100644 index 0000000..99459af --- /dev/null +++ b/SOURCES/0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch @@ -0,0 +1,250 @@ +From 04a43bb831b98ce9942ac6daa7e14ff88ecd9bee Mon Sep 17 00:00:00 2001 +From: Tamar Shacked <tshacked@redhat.com> +Date: Sun, 1 Aug 2021 09:30:18 +0300 +Subject: contrib: remove contrib/sunrpc/xdr_sizeof.c + +It's not needed, and it has a license that Fedora is not very happy with. +Just removed that file. + +Backport of: +> Upstream-patch-link: https://review.gluster.org/#/c/glusterfs/+/24761/ +> Fixes: #1383 +> Change-Id: Ia753f0058c8a7c6482aca40c3b3dc8f6aa4a266d +> Signed-off-by: Yaniv Kaul <ykaul@redhat.com> + +BUG: 1939340 + +Signed-off-by: Tamar Shacked <tshacked@redhat.com> +Change-Id: Ibbc3871e66f542f4cb0c5a6c3182792eb125d0f1 +--- + contrib/sunrpc/xdr_sizeof.c | 204 ------------------------------------ + rpc/rpc-lib/src/Makefile.am | 2 +- + 2 files changed, 1 insertion(+), 205 deletions(-) + delete mode 100644 contrib/sunrpc/xdr_sizeof.c + +diff --git a/contrib/sunrpc/xdr_sizeof.c b/contrib/sunrpc/xdr_sizeof.c +deleted file mode 100644 +index ca1f7bf0a..000000000 +--- a/contrib/sunrpc/xdr_sizeof.c ++++ /dev/null +@@ -1,204 +0,0 @@ +-/* +- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. +- * +- * @APPLE_LICENSE_HEADER_START@ +- * +- * Portions Copyright (c) 1999 Apple Computer, Inc. All Rights +- * Reserved. This file contains Original Code and/or Modifications of +- * Original Code as defined in and that are subject to the Apple Public +- * Source License Version 1.1 (the "License"). You may not use this file +- * except in compliance with the License. Please obtain a copy of the +- * License at http://www.apple.com/publicsource and read it before using +- * this file. +- * +- * The Original Code and all software distributed under the License are +- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE OR NON- INFRINGEMENT. Please see the +- * License for the specific language governing rights and limitations +- * under the License. +- * +- * @APPLE_LICENSE_HEADER_END@ +- */ +- +-/* +- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for +- * unrestricted use provided that this legend is included on all tape +- * media and as a part of the software program in whole or part. Users +- * may copy or modify Sun RPC without charge, but are not authorized +- * to license or distribute it to anyone else except as part of a product or +- * program developed by the user. +- * +- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE +- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR +- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE. +- * +- * Sun RPC is provided with no support and without any obligation on the +- * part of Sun Microsystems, Inc. to assist in its use, correction, +- * modification or enhancement. +- * +- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE +- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC +- * OR ANY PART THEREOF. +- * +- * In no event will Sun Microsystems, Inc. be liable for any lost revenue +- * or profits or other special, indirect and consequential damages, even if +- * Sun has been advised of the possibility of such damages. +- * +- * Sun Microsystems, Inc. +- * 2550 Garcia Avenue +- * Mountain View, California 94043 +- */ +- +-/* +- * xdr_sizeof.c +- * +- * Copyright 1990 Sun Microsystems, Inc. +- * +- * General purpose routine to see how much space something will use +- * when serialized using XDR. +- */ +- +-#ifdef GF_DARWIN_HOST_OS +- +-#include <rpc/types.h> +-#include <rpc/xdr.h> +-#include <sys/types.h> +-#include <sys/cdefs.h> +- +-#include <stdlib.h> +- +-/* ARGSUSED */ +-#ifdef GF_DARWIN_HOST_OS +-static bool_t +-x_putlong (XDR *xdrs, const int *longp) +-{ +- xdrs->x_handy += BYTES_PER_XDR_UNIT; +- return TRUE; +-} +- +-#else +-static bool_t +-x_putlong (XDR *xdrs, const long *longp) +-{ +- xdrs->x_handy += BYTES_PER_XDR_UNIT; +- return TRUE; +-} +-#endif +- +-/* ARGSUSED */ +-static bool_t +-x_putbytes (XDR *xdrs, const char *bp, u_int len) +-{ +- xdrs->x_handy += len; +- return TRUE; +-} +- +-#ifdef GF_DARWIN_HOST_OS +-static u_int +-x_getpostn (XDR *xdrs) +-{ +- return xdrs->x_handy; +-} +-#else +-static u_int +-x_getpostn (const XDR *xdrs) +-{ +- return xdrs->x_handy; +-} +-#endif +- +-/* ARGSUSED */ +-static bool_t +-x_setpostn (XDR *xdrs, u_int len) +-{ +- /* This is not allowed */ +- return FALSE; +-} +- +-static int32_t * +-x_inline (XDR *xdrs, u_int len) +-{ +- if (len == 0) +- return NULL; +- if (xdrs->x_op != XDR_ENCODE) +- return NULL; +- if (len < (u_int) (long int) xdrs->x_base) +- { +- /* x_private was already allocated */ +- xdrs->x_handy += len; +- return (int32_t *) xdrs->x_private; +- } +- else +- { +- /* Free the earlier space and allocate new area */ +- free (xdrs->x_private); +- if ((xdrs->x_private = (caddr_t) malloc (len)) == NULL) +- { +- xdrs->x_base = 0; +- return NULL; +- } +- xdrs->x_base = (void *) (long) len; +- xdrs->x_handy += len; +- return (int32_t *) xdrs->x_private; +- } +-} +- +-static int +-harmless (void) +-{ +- /* Always return FALSE/NULL, as the case may be */ +- return 0; +-} +- +-static void +-x_destroy (XDR *xdrs) +-{ +- xdrs->x_handy = 0; +- xdrs->x_base = 0; +- if (xdrs->x_private) +- { +- free (xdrs->x_private); +- xdrs->x_private = NULL; +- } +- return; +-} +- +-unsigned long +-xdr_sizeof (xdrproc_t func, void *data) +-{ +- XDR x; +- struct xdr_ops ops; +- bool_t stat; +- +-#ifdef GF_DARWIN_HOST_OS +- typedef bool_t (*dummyfunc1) (XDR *, int *); +-#else +- typedef bool_t (*dummyfunc1) (XDR *, long *); +-#endif +- typedef bool_t (*dummyfunc2) (XDR *, caddr_t, u_int); +- +- ops.x_putlong = x_putlong; +- ops.x_putbytes = x_putbytes; +- ops.x_inline = x_inline; +- ops.x_getpostn = x_getpostn; +- ops.x_setpostn = x_setpostn; +- ops.x_destroy = x_destroy; +- +- /* the other harmless ones */ +- ops.x_getlong = (dummyfunc1) harmless; +- ops.x_getbytes = (dummyfunc2) harmless; +- +- x.x_op = XDR_ENCODE; +- x.x_ops = &ops; +- x.x_handy = 0; +- x.x_private = (caddr_t) NULL; +- x.x_base = (caddr_t) 0; +- +- stat = func (&x, data, 0); +- if (x.x_private) +- free (x.x_private); +- return (stat == TRUE ? (unsigned) x.x_handy : 0); +-} +-#endif /* GF_DARWIN_HOST_OS */ +diff --git a/rpc/rpc-lib/src/Makefile.am b/rpc/rpc-lib/src/Makefile.am +index 81a964768..35c9db07e 100644 +--- a/rpc/rpc-lib/src/Makefile.am ++++ b/rpc/rpc-lib/src/Makefile.am +@@ -2,7 +2,7 @@ lib_LTLIBRARIES = libgfrpc.la + + libgfrpc_la_SOURCES = auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c \ + rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c \ +- rpc-drc.c $(CONTRIBDIR)/sunrpc/xdr_sizeof.c rpc-clnt-ping.c \ ++ rpc-drc.c rpc-clnt-ping.c \ + autoscale-threads.c mgmt-pmap.c + + EXTRA_DIST = libgfrpc.sym +-- +2.27.0 + diff --git a/SPECS/glusterfs.spec b/SPECS/glusterfs.spec new file mode 100644 index 0000000..3e63faa --- /dev/null +++ b/SPECS/glusterfs.spec @@ -0,0 +1,2823 @@ +%global _hardened_build 1 + +%global _for_fedora_koji_builds 0 + +# uncomment and add '%' to use the prereltag for pre-releases +# %%global prereltag qa3 + +##----------------------------------------------------------------------------- +## All argument definitions should be placed here and keep them sorted +## + +# asan +# if you wish to compile an rpm with address sanitizer... +# rpmbuild -ta glusterfs-6.0.tar.gz --with asan +%{?_with_asan:%global _with_asan --enable-asan} + +%if ( 0%{?rhel} && 0%{?rhel} < 7 ) +%global _with_asan %{nil} +%endif + +# bd +# if you wish to compile an rpm without the BD map support... +# rpmbuild -ta glusterfs-6.0.tar.gz --without bd +%{?_without_bd:%global _without_bd --disable-bd-xlator} + +%if ( 0%{?rhel} && 0%{?rhel} > 7 ) +%global _without_bd --without-bd +%endif + +# cmocka +# if you wish to compile an rpm with cmocka unit testing... +# rpmbuild -ta glusterfs-6.0.tar.gz --with cmocka +%{?_with_cmocka:%global _with_cmocka --enable-cmocka} + +# debug +# if you wish to compile an rpm with debugging... +# rpmbuild -ta glusterfs-6.0.tar.gz --with debug +%{?_with_debug:%global _with_debug --enable-debug} + +# epoll +# if you wish to compile an rpm without epoll... +# rpmbuild -ta glusterfs-6.0.tar.gz --without epoll +%{?_without_epoll:%global _without_epoll --disable-epoll} + +# fusermount +# if you wish to compile an rpm without fusermount... +# rpmbuild -ta glusterfs-6.0.tar.gz --without fusermount +%{?_without_fusermount:%global _without_fusermount --disable-fusermount} + +# geo-rep +# if you wish to compile an rpm without geo-replication support, compile like this... +# rpmbuild -ta glusterfs-6.0.tar.gz --without georeplication +%{?_without_georeplication:%global _without_georeplication --disable-georeplication} + +# ipv6default +# if you wish to compile an rpm with IPv6 default... +# rpmbuild -ta glusterfs-6.0.tar.gz --with ipv6default +%{?_with_ipv6default:%global _with_ipv6default --with-ipv6-default} + +# libtirpc +# if you wish to compile an rpm without TIRPC (i.e. use legacy glibc rpc) +# rpmbuild -ta glusterfs-6.0.tar.gz --without libtirpc +%{?_without_libtirpc:%global _without_libtirpc --without-libtirpc} + +# Do not use libtirpc on EL6, it does not have xdr_uint64_t() and xdr_uint32_t +# Do not use libtirpc on EL7, it does not have xdr_sizeof() +%if ( 0%{?rhel} && 0%{?rhel} <= 7 ) +%global _without_libtirpc --without-libtirpc +%endif + + +# ocf +# if you wish to compile an rpm without the OCF resource agents... +# rpmbuild -ta glusterfs-6.0.tar.gz --without ocf +%{?_without_ocf:%global _without_ocf --without-ocf} + +# rdma +# if you wish to compile an rpm without rdma support, compile like this... +# rpmbuild -ta glusterfs-6.0.tar.gz --without rdma +%{?_without_rdma:%global _without_rdma --disable-ibverbs} + +# No RDMA Support on 32-bit ARM +%ifarch armv7hl +%global _without_rdma --disable-ibverbs +%endif + +# server +# if you wish to build rpms without server components, compile like this +# rpmbuild -ta glusterfs-6.0.tar.gz --without server +%{?_without_server:%global _without_server --without-server} + +# disable server components forcefully as rhel <= 6 +%if ( 0%{?rhel} ) +%if (!(( "%{?dist}" == ".el6rhs" ) || ( "%{?dist}" == ".el7rhs" ) || ( "%{?dist}" == ".el7rhgs" ) || ( "%{?dist}" == ".el8rhgs" ))) +%global _without_server --without-server +%endif +%endif + +%global _without_extra_xlators 1 +%global _without_regression_tests 1 + +# syslog +# if you wish to build rpms without syslog logging, compile like this +# rpmbuild -ta glusterfs-6.0.tar.gz --without syslog +%{?_without_syslog:%global _without_syslog --disable-syslog} + +# disable syslog forcefully as rhel <= 6 doesn't have rsyslog or rsyslog-mmcount +# Fedora deprecated syslog, see +# https://fedoraproject.org/wiki/Changes/NoDefaultSyslog +# (And what about RHEL7?) +%if ( 0%{?fedora} && 0%{?fedora} >= 20 ) || ( 0%{?rhel} && 0%{?rhel} <= 6 ) +%global _without_syslog --disable-syslog +%endif + +# tsan +# if you wish to compile an rpm with thread sanitizer... +# rpmbuild -ta glusterfs-6.0.tar.gz --with tsan +%{?_with_tsan:%global _with_tsan --enable-tsan} + +%if ( 0%{?rhel} && 0%{?rhel} < 7 ) +%global _with_tsan %{nil} +%endif + +# valgrind +# if you wish to compile an rpm to run all processes under valgrind... +# rpmbuild -ta glusterfs-6.0.tar.gz --with valgrind +%{?_with_valgrind:%global _with_valgrind --enable-valgrind} + +##----------------------------------------------------------------------------- +## All %%global definitions should be placed here and keep them sorted +## + +# selinux booleans whose defalut value needs modification +# these booleans will be consumed by "%%selinux_set_booleans" macro. +%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) +%global selinuxbooleans rsync_full_access=1 rsync_client=1 +%endif + +%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) +%global _with_systemd true +%endif + +%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 7 ) +%global _with_firewalld --enable-firewalld +%endif + +%if 0%{?_tmpfilesdir:1} +%global _with_tmpfilesdir --with-tmpfilesdir=%{_tmpfilesdir} +%else +%global _with_tmpfilesdir --without-tmpfilesdir +%endif + +# without server should also disable some server-only components +%if 0%{?_without_server:1} +%global _without_events --disable-events +%global _without_georeplication --disable-georeplication +%global _without_tiering --disable-tiering +%global _without_ocf --without-ocf +%endif + +%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 7 ) +%global _usepython3 1 +%global _pythonver 3 +%else +%global _usepython3 0 +%global _pythonver 2 +%endif + +# From https://fedoraproject.org/wiki/Packaging:Python#Macros +%if ( 0%{?rhel} && 0%{?rhel} <= 6 ) +%{!?python2_sitelib: %global python2_sitelib %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")} +%{!?python2_sitearch: %global python2_sitearch %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")} +%global _rundir %{_localstatedir}/run +%endif + +%if ( 0%{?_with_systemd:1} ) +%global service_enable() /bin/systemctl --quiet enable %1.service || : \ +%{nil} +%global service_start() /bin/systemctl --quiet start %1.service || : \ +%{nil} +%global service_stop() /bin/systemctl --quiet stop %1.service || :\ +%{nil} +%global service_install() install -D -p -m 0644 %1.service %{buildroot}%2 \ +%{nil} +# can't seem to make a generic macro that works +%global glusterd_svcfile %{_unitdir}/glusterd.service +%global glusterfsd_svcfile %{_unitdir}/glusterfsd.service +%global glusterta_svcfile %{_unitdir}/gluster-ta-volume.service +%global glustereventsd_svcfile %{_unitdir}/glustereventsd.service +%global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service +%else +%global service_enable() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \ +%{nil} +%global systemd_preun() /sbin/chkconfig --del %1 >/dev/null 2>&1 || : \ +%{nil} +%global systemd_postun_with_restart() /sbin/service %1 condrestart >/dev/null 2>&1 || : \ +%{nil} +%global service_start() /sbin/service %1 start >/dev/null 2>&1 || : \ +%{nil} +%global service_stop() /sbin/service %1 stop >/dev/null 2>&1 || : \ +%{nil} +%global service_install() install -D -p -m 0755 %1.init %{buildroot}%2 \ +%{nil} +# can't seem to make a generic macro that works +%global glusterd_svcfile %{_sysconfdir}/init.d/glusterd +%global glusterfsd_svcfile %{_sysconfdir}/init.d/glusterfsd +%global glustereventsd_svcfile %{_sysconfdir}/init.d/glustereventsd +%endif + +%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}} + +# We do not want to generate useless provides and requires for xlator +# .so files to be set for glusterfs packages. +# Filter all generated: +# +# TODO: RHEL5 does not have a convenient solution +%if ( 0%{?rhel} == 6 ) +# filter_setup exists in RHEL6 only +%filter_provides_in %{_libdir}/glusterfs/%{version}/ +%global __filter_from_req %{?__filter_from_req} | grep -v -P '^(?!lib).*\.so.*$' +%filter_setup +%else +# modern rpm and current Fedora do not generate requires when the +# provides are filtered +%global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$ +%endif + + +##----------------------------------------------------------------------------- +## All package definitions should be placed here in alphabetical order +## +Summary: Distributed File System +%if ( 0%{_for_fedora_koji_builds} ) +Name: glusterfs +Version: 3.8.0 +Release: 0.2%{?prereltag:.%{prereltag}}%{?dist} +%else +Name: glusterfs +Version: 6.0 +Release: 57.4%{?dist} +ExcludeArch: i686 +%endif +License: GPLv2 or LGPLv3+ +URL: http://docs.gluster.org/ +%if ( 0%{_for_fedora_koji_builds} ) +Source0: http://bits.gluster.org/pub/gluster/glusterfs/src/glusterfs-%{version}%{?prereltag}.tar.gz +Source1: glusterd.sysconfig +Source2: glusterfsd.sysconfig +Source7: glusterfsd.service +Source8: glusterfsd.init +%else +Source0: glusterfs-6.0.tar.gz +%endif + +Requires(pre): shadow-utils +%if ( 0%{?_with_systemd:1} ) +BuildRequires: systemd +%endif + +Requires: %{name}-libs%{?_isa} = %{version}-%{release} +%if ( 0%{?_with_systemd:1} ) +%{?systemd_requires} +%endif +%if 0%{?_with_asan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 ) +BuildRequires: libasan +%endif +%if 0%{?_with_tsan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 ) +BuildRequires: libtsan +%endif +BuildRequires: git +BuildRequires: bison flex +BuildRequires: gcc make libtool +BuildRequires: ncurses-devel readline-devel +BuildRequires: libxml2-devel openssl-devel +BuildRequires: libaio-devel libacl-devel +BuildRequires: python%{_pythonver}-devel +%if ( 0%{?rhel} && 0%{?rhel} < 8 ) +BuildRequires: python-ctypes +%endif +%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} ) || ( 0%{?rhel} && ( 0%{?rhel} >= 8 ) ) +BuildRequires: libtirpc-devel +%endif +%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 ) +BuildRequires: rpcgen +%endif +BuildRequires: userspace-rcu-devel >= 0.7 +%if ( 0%{?rhel} && 0%{?rhel} <= 6 ) +BuildRequires: automake +%endif +BuildRequires: libuuid-devel +%if ( 0%{?_with_cmocka:1} ) +BuildRequires: libcmocka-devel >= 1.0.1 +%endif +%if ( 0%{!?_without_tiering:1} ) +BuildRequires: sqlite-devel +%endif +%if ( 0%{!?_without_georeplication:1} ) +BuildRequires: libattr-devel +%endif + +%if (0%{?_with_firewalld:1}) +BuildRequires: firewalld +%endif + +Obsoletes: hekafs +Obsoletes: %{name}-common < %{version}-%{release} +Obsoletes: %{name}-core < %{version}-%{release} +Obsoletes: %{name}-ufo +%if ( 0%{!?_with_gnfs:1} ) +Obsoletes: %{name}-gnfs +%endif +%if ( 0%{?rhel} < 7 ) +Obsoletes: %{name}-ganesha +%endif +Provides: %{name}-common = %{version}-%{release} +Provides: %{name}-core = %{version}-%{release} + +# Patch0001: 0001-Update-rfc.sh-to-rhgs-3.5.0.patch +Patch0002: 0002-glusterd-fix-op-versions-for-RHS-backwards-compatabi.patch +Patch0003: 0003-rpc-set-bind-insecure-to-off-by-default.patch +Patch0004: 0004-glusterd-spec-fixing-autogen-issue.patch +Patch0005: 0005-libglusterfs-glusterd-Fix-compilation-errors.patch +Patch0006: 0006-build-remove-ghost-directory-entries.patch +Patch0007: 0007-build-add-RHGS-specific-changes.patch +Patch0008: 0008-secalert-remove-setuid-bit-for-fusermount-glusterfs.patch +Patch0009: 0009-build-introduce-security-hardening-flags-in-gluster.patch +Patch0010: 0010-spec-fix-add-pre-transaction-scripts-for-geo-rep-and.patch +Patch0011: 0011-rpm-glusterfs-devel-for-client-builds-should-not-dep.patch +Patch0012: 0012-build-add-pretrans-check.patch +Patch0013: 0013-glusterd-fix-info-file-checksum-mismatch-during-upgr.patch +Patch0014: 0014-build-spec-file-conflict-resolution.patch +Patch0015: 0015-build-randomize-temp-file-names-in-pretrans-scriptle.patch +Patch0016: 0016-glusterd-parallel-readdir-Change-the-op-version-of-p.patch +Patch0017: 0017-glusterd-Revert-op-version-for-cluster.max-brick-per.patch +Patch0018: 0018-cli-Add-message-for-user-before-modifying-brick-mult.patch +Patch0019: 0019-build-launch-glusterd-upgrade-after-all-new-bits-are.patch +Patch0020: 0020-spec-unpackaged-files-found-for-RHEL-7-client-build.patch +Patch0021: 0021-cli-glusterfsd-remove-copyright-information.patch +Patch0022: 0022-cli-Remove-upstream-doc-reference.patch +Patch0023: 0023-hooks-remove-selinux-hooks.patch +Patch0024: 0024-glusterd-Make-localtime-logging-option-invisible-in-.patch +Patch0025: 0025-build-make-RHGS-version-available-for-server.patch +Patch0026: 0026-glusterd-Introduce-daemon-log-level-cluster-wide-opt.patch +Patch0027: 0027-glusterd-change-op-version-of-fips-mode-rchecksum.patch +Patch0028: 0028-glusterd-Reset-op-version-for-features.shard-deletio.patch +Patch0029: 0029-glusterd-Reset-op-version-for-features.shard-lru-lim.patch +Patch0030: 0030-selinux-glusterd-add-features.selinux-to-glusterd-vo.patch +Patch0031: 0031-glusterd-turn-off-selinux-feature-in-downstream.patch +Patch0032: 0032-glusterd-update-gd-op-version-to-3_7_0.patch +Patch0033: 0033-build-add-missing-explicit-package-dependencies.patch +Patch0034: 0034-glusterd-introduce-a-new-op-version-for-rhgs-3.4.3.patch +Patch0035: 0035-glusterd-tag-rebalance-mgmt_v3-command-to-op-version.patch +Patch0036: 0036-build-add-conditional-dependency-on-server-for-devel.patch +Patch0037: 0037-cli-change-the-warning-message.patch +Patch0038: 0038-spec-avoid-creation-of-temp-file-in-lua-script.patch +Patch0039: 0039-cli-fix-query-to-user-during-brick-mux-selection.patch +Patch0040: 0040-build-Remove-unsupported-test-cases-failing-consiste.patch +Patch0041: 0041-tests-geo-rep-Build-failed-in-Jenkins-for-test-bug-1.patch +Patch0042: 0042-spec-client-server-Builds-are-failing-on-rhel-6.patch +Patch0043: 0043-inode-don-t-dump-the-whole-table-to-CLI.patch +Patch0044: 0044-cluster-ec-Don-t-enqueue-an-entry-if-it-is-already-h.patch +Patch0045: 0045-glusterd-fix-txn-id-mem-leak.patch +Patch0046: 0046-protocol-client-Do-not-fallback-to-anon-fd-if-fd-is-.patch +Patch0047: 0047-client-rpc-Fix-the-payload-being-sent-on-the-wire.patch +Patch0048: 0048-gfapi-Unblock-epoll-thread-for-upcall-processing.patch +Patch0049: 0049-transport-socket-log-shutdown-msg-occasionally.patch +Patch0050: 0050-geo-rep-Fix-syncing-multiple-rename-of-symlink.patch +Patch0051: 0051-spec-update-rpm-install-condition.patch +Patch0052: 0052-geo-rep-IPv6-support.patch +Patch0053: 0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch +Patch0054: 0054-Revert-glusterd-storhaug-remove-ganesha.patch +Patch0055: 0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch +Patch0056: 0056-common-ha-fixes-for-Debian-based-systems.patch +Patch0057: 0057-ganesha-scripts-Remove-export-entries-from-ganesha.c.patch +Patch0058: 0058-glusterd-ganesha-During-volume-delete-remove-the-gan.patch +Patch0059: 0059-glusterd-ganesha-throw-proper-error-for-gluster-nfs-.patch +Patch0060: 0060-ganesha-scripts-Stop-ganesha-process-on-all-nodes-if.patch +Patch0061: 0061-ganesha-allow-refresh-config-and-volume-export-unexp.patch +Patch0062: 0062-glusterd-ganesha-perform-removal-of-ganesha.conf-on-.patch +Patch0063: 0063-glusterd-ganesha-update-cache-invalidation-properly-.patch +Patch0064: 0064-glusterd-ganesha-return-proper-value-in-pre_setup.patch +Patch0065: 0065-ganesha-scripts-remove-dependency-over-export-config.patch +Patch0066: 0066-glusterd-ganesha-add-proper-NULL-check-in-manage_exp.patch +Patch0067: 0067-ganesha-minor-improvments-for-commit-e91cdf4-17081.patch +Patch0068: 0068-common-ha-surviving-ganesha.nfsd-not-put-in-grace-on.patch +Patch0069: 0069-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch +Patch0070: 0070-packaging-glusterfs-ganesha-update-sometimes-fails-s.patch +Patch0071: 0071-common-ha-enable-and-disable-selinux-gluster_use_exe.patch +Patch0072: 0072-ganesha-ha-don-t-set-SELinux-booleans-if-SELinux-is-.patch +Patch0073: 0073-build-remove-ganesha-dependency-on-selinux-policy.patch +Patch0074: 0074-common-ha-enable-pacemaker-at-end-of-setup.patch +Patch0075: 0075-common-ha-Fix-an-incorrect-syntax-during-setup.patch +Patch0076: 0076-glusterd-ganesha-change-voltype-for-ganesha.enable-i.patch +Patch0077: 0077-glusterd-ganesha-create-remove-export-file-only-from.patch +Patch0078: 0078-common-ha-scripts-pass-the-list-of-servers-properly-.patch +Patch0079: 0079-common-ha-All-statd-related-files-need-to-be-owned-b.patch +Patch0080: 0080-glusterd-ganesha-Skip-non-ganesha-nodes-properly-for.patch +Patch0081: 0081-ganesha-ha-ensure-pacemaker-is-enabled-after-setup.patch +Patch0082: 0082-build-Add-dependency-on-netstat-for-glusterfs-ganesh.patch +Patch0083: 0083-common-ha-enable-and-disable-selinux-ganesha_use_fus.patch +Patch0084: 0084-glusterd-Fix-duplicate-client_op_version-in-info-fil.patch +Patch0085: 0085-Revert-all-remove-code-which-is-not-being-considered.patch +Patch0086: 0086-Revert-tiering-remove-the-translator-from-build-and-.patch +Patch0087: 0087-ganesha-fixing-minor-issues-after-the-backport-from-.patch +Patch0088: 0088-tier-fix-failures-noticed-during-tier-start-and-tier.patch +Patch0089: 0089-glusterd-gNFS-On-post-upgrade-to-3.2-disable-gNFS-fo.patch +Patch0090: 0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch +Patch0091: 0091-glusterd-gNFS-explicitly-set-nfs.disable-to-off-afte.patch +Patch0092: 0092-logging-Fix-GF_LOG_OCCASSIONALLY-API.patch +Patch0093: 0093-glusterd-Change-op-version-of-cache-invalidation-in-.patch +Patch0094: 0094-glusterd-load-ctime-in-the-client-graph-only-if-it-s.patch +Patch0095: 0095-cluster-afr-Remove-local-from-owners_list-on-failure.patch +Patch0096: 0096-core-Brick-is-not-able-to-detach-successfully-in-bri.patch +Patch0097: 0097-glusterd-tier-while-doing-an-attach-tier-the-self-he.patch +Patch0098: 0098-mgmt-shd-Implement-multiplexing-in-self-heal-daemon.patch +Patch0099: 0099-client-fini-return-fini-after-rpc-cleanup.patch +Patch0100: 0100-clnt-rpc-ref-leak-during-disconnect.patch +Patch0101: 0101-shd-mux-Fix-coverity-issues-introduced-by-shd-mux-pa.patch +Patch0102: 0102-rpc-transport-Missing-a-ref-on-dict-while-creating-t.patch +Patch0103: 0103-dht-NULL-check-before-setting-error-flag.patch +Patch0104: 0104-afr-shd-Cleanup-self-heal-daemon-resources-during-af.patch +Patch0105: 0105-core-Log-level-changes-do-not-effect-on-running-clie.patch +Patch0106: 0106-libgfchangelog-use-find_library-to-locate-shared-lib.patch +Patch0107: 0107-gfapi-add-function-to-set-client-pid.patch +Patch0108: 0108-afr-add-client-pid-to-all-gf_event-calls.patch +Patch0109: 0109-glusterd-Optimize-glusterd-handshaking-code-path.patch +Patch0110: 0110-tier-shd-glusterd-with-shd-mux-the-shd-volfile-path-.patch +Patch0111: 0111-glusterd-fix-loading-ctime-in-client-graph-logic.patch +Patch0112: 0112-geo-rep-fix-incorrectly-formatted-authorized_keys.patch +Patch0113: 0113-spec-Glusterd-did-not-start-by-default-after-node-re.patch +Patch0114: 0114-core-fix-hang-issue-in-__gf_free.patch +Patch0115: 0115-core-only-log-seek-errors-if-SEEK_HOLE-SEEK_DATA-is-.patch +Patch0116: 0116-cluster-ec-fix-fd-reopen.patch +Patch0117: 0117-spec-Remove-thin-arbiter-package.patch +Patch0118: 0118-tests-mark-thin-arbiter-test-ta.t-as-bad.patch +Patch0119: 0119-glusterd-provide-a-way-to-detach-failed-node.patch +Patch0120: 0120-glusterd-shd-Keep-a-ref-on-volinfo-until-attach-rpc-.patch +Patch0121: 0121-spec-glusterfs-devel-for-client-build-should-not-dep.patch +Patch0122: 0122-posix-ctime-Fix-stat-time-attributes-inconsistency-d.patch +Patch0123: 0123-ctime-Fix-log-repeated-logging-during-open.patch +Patch0124: 0124-spec-remove-duplicate-references-to-files.patch +Patch0125: 0125-glusterd-define-dumpops-in-the-xlator_api-of-gluster.patch +Patch0126: 0126-cluster-dht-refactor-dht-lookup-functions.patch +Patch0127: 0127-cluster-dht-Refactor-dht-lookup-functions.patch +Patch0128: 0128-glusterd-Fix-bulkvoldict-thread-logic-in-brick-multi.patch +Patch0129: 0129-core-handle-memory-accounting-correctly.patch +Patch0130: 0130-tier-test-new-tier-cmds.t-fails-after-a-glusterd-res.patch +Patch0131: 0131-tests-dht-Test-that-lookups-are-sent-post-brick-up.patch +Patch0132: 0132-glusterd-remove-duplicate-occurrence-of-features.sel.patch +Patch0133: 0133-glusterd-enable-fips-mode-rchecksum-for-new-volumes.patch +Patch0134: 0134-performance-write-behind-remove-request-from-wip-lis.patch +Patch0135: 0135-geo-rep-fix-incorrectly-formatted-authorized_keys.patch +Patch0136: 0136-glusterd-fix-inconsistent-global-option-output-in-vo.patch +Patch0137: 0137-shd-glusterd-Serialize-shd-manager-to-prevent-race-c.patch +Patch0138: 0138-glusterd-Add-gluster-volume-stop-operation-to-gluste.patch +Patch0139: 0139-ec-shd-Cleanup-self-heal-daemon-resources-during-ec-.patch +Patch0140: 0140-cluster-ec-Reopen-shouldn-t-happen-with-O_TRUNC.patch +Patch0141: 0141-socket-ssl-fix-crl-handling.patch +Patch0142: 0142-lock-check-null-value-of-dict-to-avoid-log-flooding.patch +Patch0143: 0143-packaging-Change-the-dependency-on-nfs-ganesha-to-2..patch +Patch0144: 0144-cluster-ec-honor-contention-notifications-for-partia.patch +Patch0145: 0145-core-Capture-process-memory-usage-at-the-time-of-cal.patch +Patch0146: 0146-dht-Custom-xattrs-are-not-healed-in-case-of-add-bric.patch +Patch0147: 0147-glusterd-bulkvoldict-thread-is-not-handling-all-volu.patch +Patch0148: 0148-cluster-dht-Lookup-all-files-when-processing-directo.patch +Patch0149: 0149-glusterd-Optimize-code-to-copy-dictionary-in-handsha.patch +Patch0150: 0150-libglusterfs-define-macros-needed-for-cloudsync.patch +Patch0151: 0151-mgmt-glusterd-Make-changes-related-to-cloudsync-xlat.patch +Patch0152: 0152-storage-posix-changes-with-respect-to-cloudsync.patch +Patch0153: 0153-features-cloudsync-Added-some-new-functions.patch +Patch0154: 0154-cloudsync-cvlt-Cloudsync-plugin-for-commvault-store.patch +Patch0155: 0155-cloudsync-Make-readdirp-return-stat-info-of-all-the-.patch +Patch0156: 0156-cloudsync-Fix-bug-in-cloudsync-fops-c.py.patch +Patch0157: 0157-afr-frame-Destroy-frame-after-afr_selfheal_entry_gra.patch +Patch0158: 0158-glusterfsd-cleanup-Protect-graph-object-under-a-lock.patch +Patch0159: 0159-glusterd-add-an-op-version-check.patch +Patch0160: 0160-geo-rep-Geo-rep-help-text-issue.patch +Patch0161: 0161-geo-rep-Fix-rename-with-existing-destination-with-sa.patch +Patch0162: 0162-geo-rep-Fix-sync-method-config.patch +Patch0163: 0163-geo-rep-Fix-sync-hang-with-tarssh.patch +Patch0164: 0164-cluster-ec-Fix-handling-of-heal-info-cases-without-l.patch +Patch0165: 0165-tests-shd-Add-test-coverage-for-shd-mux.patch +Patch0166: 0166-glusterd-svc-glusterd_svcs_stop-should-call-individu.patch +Patch0167: 0167-glusterd-shd-Optimize-the-glustershd-manager-to-send.patch +Patch0168: 0168-cluster-dht-Fix-directory-perms-during-selfheal.patch +Patch0169: 0169-Build-Fix-spec-to-enable-rhel8-client-build.patch +Patch0170: 0170-geo-rep-Convert-gfid-conflict-resolutiong-logs-into-.patch +Patch0171: 0171-posix-add-storage.reserve-size-option.patch +Patch0172: 0172-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch +Patch0173: 0173-glusterd-store-fips-mode-rchecksum-option-in-the-inf.patch +Patch0174: 0174-xlator-log-Add-more-logging-in-xlator_is_cleanup_sta.patch +Patch0175: 0175-ec-fini-Fix-race-between-xlator-cleanup-and-on-going.patch +Patch0176: 0176-features-shard-Fix-crash-during-background-shard-del.patch +Patch0177: 0177-features-shard-Fix-extra-unref-when-inode-object-is-.patch +Patch0178: 0178-Cluster-afr-Don-t-treat-all-bricks-having-metadata-p.patch +Patch0179: 0179-tests-Fix-split-brain-favorite-child-policy.t-failur.patch +Patch0180: 0180-ganesha-scripts-Make-generate-epoch.py-python3-compa.patch +Patch0181: 0181-afr-log-before-attempting-data-self-heal.patch +Patch0182: 0182-geo-rep-fix-mountbroker-setup.patch +Patch0183: 0183-glusterd-svc-Stop-stale-process-using-the-glusterd_p.patch +Patch0184: 0184-tests-Add-gating-configuration-file-for-rhel8.patch +Patch0185: 0185-gfapi-provide-an-api-for-setting-statedump-path.patch +Patch0186: 0186-cli-Remove-brick-warning-seems-unnecessary.patch +Patch0187: 0187-gfapi-statedump_path-add-proper-version-number.patch +Patch0188: 0188-features-shard-Fix-integer-overflow-in-block-count-a.patch +Patch0189: 0189-features-shard-Fix-block-count-accounting-upon-trunc.patch +Patch0190: 0190-Build-removing-the-hardcoded-usage-of-python3.patch +Patch0191: 0191-Build-Update-python-shebangs-based-on-version.patch +Patch0192: 0192-build-Ensure-gluster-cli-package-is-built-as-part-of.patch +Patch0193: 0193-spec-fixed-python-dependency-for-rhel6.patch +Patch0194: 0194-stack-Make-sure-to-have-unique-call-stacks-in-all-ca.patch +Patch0195: 0195-build-package-glusterfs-ganesha-for-rhel7-and-above.patch +Patch0196: 0196-posix-ctime-Fix-ctime-upgrade-issue.patch +Patch0197: 0197-posix-fix-crash-in-posix_cs_set_state.patch +Patch0198: 0198-cluster-ec-Prevent-double-pre-op-xattrops.patch +Patch0199: 0199-upcall-Avoid-sending-notifications-for-invalid-inode.patch +Patch0200: 0200-gfapi-fix-incorrect-initialization-of-upcall-syncop-.patch +Patch0201: 0201-geo-rep-Fix-permissions-for-GEOREP_DIR-in-non-root-s.patch +Patch0202: 0202-shd-mux-Fix-race-between-mux_proc-unlink-and-stop.patch +Patch0203: 0203-glusterd-shd-Change-shd-logfile-to-a-unique-name.patch +Patch0204: 0204-glusterd-conditionally-clear-txn_opinfo-in-stage-op.patch +Patch0205: 0205-glusterd-Can-t-run-rebalance-due-to-long-unix-socket.patch +Patch0206: 0206-glusterd-ignore-user.-options-from-compatibility-che.patch +Patch0207: 0207-glusterd-fix-use-after-free-of-a-dict_t.patch +Patch0208: 0208-mem-pool-remove-dead-code.patch +Patch0209: 0209-core-avoid-dynamic-TLS-allocation-when-possible.patch +Patch0210: 0210-mem-pool.-c-h-minor-changes.patch +Patch0211: 0211-libglusterfs-Fix-compilation-when-disable-mempool-is.patch +Patch0212: 0212-core-fix-memory-allocation-issues.patch +Patch0213: 0213-cluster-dht-Strip-out-dht-xattrs.patch +Patch0214: 0214-geo-rep-Upgrading-config-file-to-new-version.patch +Patch0215: 0215-posix-modify-storage.reserve-option-to-take-size-and.patch +Patch0216: 0216-Test-case-fixe-for-downstream-3.5.0.patch +Patch0217: 0217-uss-Fix-tar-issue-with-ctime-and-uss-enabled.patch +Patch0218: 0218-graph-shd-Use-glusterfs_graph_deactivate-to-free-the.patch +Patch0219: 0219-posix-add-posix_set_ctime-in-posix_ftruncate.patch +Patch0220: 0220-graph-shd-Use-top-down-approach-while-cleaning-xlato.patch +Patch0221: 0221-protocol-client-propagte-GF_EVENT_CHILD_PING-only-fo.patch +Patch0222: 0222-cluster-dht-Fixed-a-memleak-in-dht_rename_cbk.patch +Patch0223: 0223-change-get_real_filename-implementation-to-use-ENOAT.patch +Patch0224: 0224-core-replace-inet_addr-with-inet_pton.patch +Patch0225: 0225-tests-utils-Fix-py2-py3-util-python-scripts.patch +Patch0226: 0226-geo-rep-fix-gluster-command-path-for-non-root-sessio.patch +Patch0227: 0227-glusterd-svc-update-pid-of-mux-volumes-from-the-shd-.patch +Patch0228: 0228-locks-enable-notify-contention-by-default.patch +Patch0229: 0229-glusterd-Show-the-correct-brick-status-in-get-state.patch +Patch0230: 0230-Revert-glusterd-svc-update-pid-of-mux-volumes-from-t.patch +Patch0231: 0231-Revert-graph-shd-Use-top-down-approach-while-cleanin.patch +Patch0232: 0232-cluster-afr-Fix-incorrect-reporting-of-gfid-type-mis.patch +Patch0233: 0233-Revert-graph-shd-Use-glusterfs_graph_deactivate-to-f.patch +Patch0234: 0234-Revert-glusterd-shd-Change-shd-logfile-to-a-unique-n.patch +Patch0235: 0235-Revert-glusterd-svc-Stop-stale-process-using-the-glu.patch +Patch0236: 0236-Revert-shd-mux-Fix-race-between-mux_proc-unlink-and-.patch +Patch0237: 0237-Revert-ec-fini-Fix-race-between-xlator-cleanup-and-o.patch +Patch0238: 0238-Revert-xlator-log-Add-more-logging-in-xlator_is_clea.patch +Patch0239: 0239-Revert-ec-fini-Fix-race-with-ec_fini-and-ec_notify.patch +Patch0240: 0240-Revert-glusterd-shd-Optimize-the-glustershd-manager-.patch +Patch0241: 0241-Revert-glusterd-svc-glusterd_svcs_stop-should-call-i.patch +Patch0242: 0242-Revert-tests-shd-Add-test-coverage-for-shd-mux.patch +Patch0243: 0243-Revert-glusterfsd-cleanup-Protect-graph-object-under.patch +Patch0244: 0244-Revert-ec-shd-Cleanup-self-heal-daemon-resources-dur.patch +Patch0245: 0245-Revert-shd-glusterd-Serialize-shd-manager-to-prevent.patch +Patch0246: 0246-Revert-glusterd-shd-Keep-a-ref-on-volinfo-until-atta.patch +Patch0247: 0247-Revert-afr-shd-Cleanup-self-heal-daemon-resources-du.patch +Patch0248: 0248-Revert-shd-mux-Fix-coverity-issues-introduced-by-shd.patch +Patch0249: 0249-Revert-client-fini-return-fini-after-rpc-cleanup.patch +Patch0250: 0250-Revert-mgmt-shd-Implement-multiplexing-in-self-heal-.patch +Patch0251: 0251-tests-Fix-bug-1717819-metadata-split-brain-detection.patch +Patch0252: 0252-glusterd-do-not-mark-skip_locking-as-true-for-geo-re.patch +Patch0253: 0253-core-fix-deadlock-between-statedump-and-fd_anonymous.patch +Patch0254: 0254-Detach-iot_worker-to-release-its-resources.patch +Patch0255: 0255-Revert-tier-shd-glusterd-with-shd-mux-the-shd-volfil.patch +Patch0256: 0256-features-snapview-server-use-the-same-volfile-server.patch +Patch0257: 0257-geo-rep-Test-case-for-upgrading-config-file.patch +Patch0258: 0258-geo-rep-Fix-mount-broker-setup-issue.patch +Patch0259: 0259-gluster-block-tuning-perf-options.patch +Patch0260: 0260-ctime-Set-mdata-xattr-on-legacy-files.patch +Patch0261: 0261-features-utime-Fix-mem_put-crash.patch +Patch0262: 0262-glusterd-ctime-Disable-ctime-by-default.patch +Patch0263: 0263-tests-fix-ctime-related-tests.patch +Patch0264: 0264-gfapi-Fix-deadlock-while-processing-upcall.patch +Patch0265: 0265-fuse-add-missing-GF_FREE-to-fuse_interrupt.patch +Patch0266: 0266-geo-rep-Fix-mount-broker-setup-issue.patch +Patch0267: 0267-posix-ctime-Fix-race-during-lookup-ctime-xattr-heal.patch +Patch0268: 0268-rpc-transport-have-default-listen-port.patch +Patch0269: 0269-ec-fix-truncate-lock-to-cover-the-write-in-tuncate-c.patch +Patch0270: 0270-cluster-ec-inherit-healing-from-lock-when-it-has-inf.patch +Patch0271: 0271-cluster-ec-fix-EIO-error-for-concurrent-writes-on-sp.patch +Patch0272: 0272-cluster-ec-Always-read-from-good-mask.patch +Patch0273: 0273-cluster-ec-Fix-reopen-flags-to-avoid-misbehavior.patch +Patch0274: 0274-cluster-ec-Update-lock-good_mask-on-parent-fop-failu.patch +Patch0275: 0275-cluster-ec-Create-heal-task-with-heal-process-id.patch +Patch0276: 0276-features-utime-always-update-ctime-at-setattr.patch +Patch0277: 0277-geo-rep-Fix-Config-Get-Race.patch +Patch0278: 0278-geo-rep-Fix-worker-connection-issue.patch +Patch0279: 0279-posix-In-brick_mux-brick-is-crashed-while-start-stop.patch +Patch0280: 0280-performance-md-cache-Do-not-skip-caching-of-null-cha.patch +Patch0281: 0281-ctime-Fix-incorrect-realtime-passed-to-frame-root-ct.patch +Patch0282: 0282-geo-rep-Fix-the-name-of-changelog-archive-file.patch +Patch0283: 0283-ctime-Fix-ctime-issue-with-utime-family-of-syscalls.patch +Patch0284: 0284-posix-log-aio_error-return-codes-in-posix_fs_health_.patch +Patch0285: 0285-glusterd-glusterd-service-is-getting-timed-out-on-sc.patch +Patch0286: 0286-glusterfs.spec.in-added-script-files-for-machine-com.patch +Patch0287: 0287-cluster-ec-Fail-fsync-flush-for-files-on-update-size.patch +Patch0288: 0288-cluster-ec-Fix-coverity-issues.patch +Patch0289: 0289-cluster-ec-quorum-count-implementation.patch +Patch0290: 0290-glusterd-tag-disperse.quorum-count-for-31306.patch +Patch0291: 0291-cluster-ec-Mark-release-only-when-it-is-acquired.patch +Patch0292: 0292-rpc-Update-address-family-if-it-is-not-provide-in-cm.patch +Patch0293: 0293-glusterd-IPV6-hostname-address-is-not-parsed-correct.patch +Patch0294: 0294-eventsapi-Set-IPv4-IPv6-family-based-on-input-IP.patch +Patch0295: 0295-ctime-rebalance-Heal-ctime-xattr-on-directory-during.patch +Patch0296: 0296-glusterfind-pre-command-failure-on-a-modify.patch +Patch0297: 0297-rpmbuild-fixing-the-build-errors-with-2a905a8ae.patch +Patch0298: 0298-geo-rep-fix-sub-command-during-worker-connection.patch +Patch0299: 0299-geo-rep-performance-improvement-while-syncing-rename.patch +Patch0300: 0300-cli-remove-the-warning-displayed-when-remove-brick-s.patch +Patch0301: 0301-posix-Brick-is-going-down-unexpectedly.patch +Patch0302: 0302-cluster-ec-prevent-filling-shd-log-with-table-not-fo.patch +Patch0303: 0303-posix-heketidbstorage-bricks-go-down-during-PVC-crea.patch +Patch0304: 0304-cluster-dht-Correct-fd-processing-loop.patch +Patch0305: 0305-glusterd-rebalance-start-should-fail-when-quorum-is-.patch +Patch0306: 0306-cli-fix-distCount-value.patch +Patch0307: 0307-ssl-fix-RHEL8-regression-failure.patch +Patch0308: 0308-dht-Rebalance-causing-IO-Error-File-descriptor-in-ba.patch +Patch0309: 0309-geo-rep-Fix-config-upgrade-on-non-participating-node.patch +Patch0310: 0310-tests-test-case-for-non-root-geo-rep-setup.patch +Patch0311: 0311-geo-rep-Fix-Permission-denied-traceback-on-non-root-.patch +Patch0312: 0312-Scripts-quota_fsck-script-KeyError-contri_size.patch +Patch0313: 0313-extras-Cgroup-CPU-Mem-restriction-are-not-working-on.patch +Patch0314: 0314-glusterd-tier-is_tier_enabled-inserted-causing-check.patch +Patch0315: 0315-geo-rep-Fix-py2-py3-compatibility-in-repce.patch +Patch0316: 0316-spec-fixed-python-prettytable-dependency-for-rhel6.patch +Patch0317: 0317-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0318: 0318-Update-rfc.sh-to-rhgs-3.5.1.patch +Patch0319: 0319-features-snapview-server-obtain-the-list-of-snapshot.patch +Patch0320: 0320-gf-event-Handle-unix-volfile-servers.patch +Patch0321: 0321-Adding-white-spaces-to-description-of-set-group.patch +Patch0322: 0322-glusterd-display-correct-rebalance-data-size-after-g.patch +Patch0323: 0323-cli-display-detailed-rebalance-info.patch +Patch0324: 0324-extras-hooks-Add-SELinux-label-on-new-bricks-during-.patch +Patch0325: 0325-extras-hooks-Install-and-package-newly-added-post-ad.patch +Patch0326: 0326-tests-subdir-mount.t-is-failing-for-brick_mux-regrss.patch +Patch0327: 0327-glusterfind-integrate-with-gfid2path.patch +Patch0328: 0328-glusterd-Add-warning-and-abort-in-case-of-failures-i.patch +Patch0329: 0329-cluster-afr-Heal-entries-when-there-is-a-source-no-h.patch +Patch0330: 0330-mount.glusterfs-change-the-error-message.patch +Patch0331: 0331-features-locks-Do-special-handling-for-op-version-3..patch +Patch0332: 0332-Removing-one-top-command-from-gluster-v-help.patch +Patch0333: 0333-rpc-Synchronize-slot-allocation-code.patch +Patch0334: 0334-dht-log-getxattr-failure-for-node-uuid-at-DEBUG.patch +Patch0335: 0335-tests-RHEL8-test-failure-fixes-for-RHGS.patch +Patch0336: 0336-spec-check-and-return-exit-code-in-rpm-scripts.patch +Patch0337: 0337-fuse-Set-limit-on-invalidate-queue-size.patch +Patch0338: 0338-glusterfs-fuse-Reduce-the-default-lru-limit-value.patch +Patch0339: 0339-geo-rep-fix-integer-config-validation.patch +Patch0340: 0340-rpc-event_slot_alloc-converted-infinite-loop-after-r.patch +Patch0341: 0341-socket-fix-error-handling.patch +Patch0342: 0342-Revert-hooks-remove-selinux-hooks.patch +Patch0343: 0343-extras-hooks-syntactical-errors-in-SELinux-hooks-sci.patch +Patch0344: 0344-Revert-all-fixes-to-include-SELinux-hook-scripts.patch +Patch0345: 0345-read-ahead-io-cache-turn-off-by-default.patch +Patch0346: 0346-fuse-degrade-logging-of-write-failure-to-fuse-device.patch +Patch0347: 0347-tools-glusterfind-handle-offline-bricks.patch +Patch0348: 0348-glusterfind-Fix-py2-py3-issues.patch +Patch0349: 0349-glusterfind-python3-compatibility.patch +Patch0350: 0350-tools-glusterfind-Remove-an-extra-argument.patch +Patch0351: 0351-server-Mount-fails-after-reboot-1-3-gluster-nodes.patch +Patch0352: 0352-spec-fixed-missing-dependencies-for-glusterfs-clouds.patch +Patch0353: 0353-build-glusterfs-ganesha-pkg-requires-python3-policyc.patch +Patch0354: 0354-core-fix-memory-pool-management-races.patch +Patch0355: 0355-core-Prevent-crash-on-process-termination.patch +Patch0356: 0356-Update-rfc.sh-to-rhgs-3.5.1-rhel-8.patch +Patch0357: 0357-ganesha-ha-updates-for-pcs-0.10.x-i.e.-in-Fedora-29-.patch +Patch0358: 0358-inode-fix-wrong-loop-count-in-__inode_ctx_free.patch +Patch0359: 0359-dht-gf_defrag_process_dir-is-called-even-if-gf_defra.patch +Patch0360: 0360-rpc-Make-ssl-log-more-useful.patch +Patch0361: 0361-snap_scheduler-python3-compatibility-and-new-test-ca.patch +Patch0362: 0362-write-behind-fix-data-corruption.patch +Patch0363: 0363-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch +Patch0364: 0364-dht-fixing-rebalance-failures-for-files-with-holes.patch +Patch0365: 0365-build-geo-rep-requires-relevant-selinux-permission-f.patch +Patch0366: 0366-snapshot-fix-python3-issue-in-gcron.patch +Patch0367: 0367-dht-Handle-setxattr-and-rm-race-for-directory-in-reb.patch +Patch0368: 0368-Update-rfc.sh-to-rhgs-3.5.2.patch +Patch0369: 0369-cluster-ec-Return-correct-error-code-and-log-message.patch +Patch0370: 0370-dht-Do-opendir-selectively-in-gf_defrag_process_dir.patch +Patch0371: 0371-common-ha-cluster-status-shows-FAILOVER-when-actuall.patch +Patch0372: 0372-posix-fix-seek-functionality.patch +Patch0373: 0373-build-geo-rep-sub-pkg-requires-policycoreutils-pytho.patch +Patch0374: 0374-open-behind-fix-missing-fd-reference.patch +Patch0375: 0375-features-shard-Send-correct-size-when-reads-are-sent.patch +Patch0376: 0376-features-shard-Fix-crash-during-shards-cleanup-in-er.patch +Patch0377: 0377-syncop-improve-scaling-and-implement-more-tools.patch +Patch0378: 0378-Revert-open-behind-fix-missing-fd-reference.patch +Patch0379: 0379-glusterd-add-missing-synccond_broadcast.patch +Patch0380: 0380-features-shard-Aggregate-size-block-count-in-iatt-be.patch +Patch0381: 0381-dht-add-null-check-in-gf_defrag_free_dir_dfmeta.patch +Patch0382: 0382-features-shard-Aggregate-file-size-block-count-befor.patch +Patch0383: 0383-common-ha-ganesha-ha.sh-bad-test-for-rhel-centos-for.patch +Patch0384: 0384-Update-rfc.sh-to-rhgs-3.5.3.patch +Patch0385: 0385-glusterd-start-glusterd-automatically-on-abnormal-sh.patch +Patch0386: 0386-glusterd-increase-the-StartLimitBurst.patch +Patch0387: 0387-To-fix-readdir-ahead-memory-leak.patch +Patch0388: 0388-rpc-Cleanup-SSL-specific-data-at-the-time-of-freeing.patch +Patch0389: 0389-posix-Avoid-diskpace-error-in-case-of-overwriting-th.patch +Patch0390: 0390-glusterd-deafult-options-after-volume-reset.patch +Patch0391: 0391-glusterd-unlink-the-file-after-killing-the-process.patch +Patch0392: 0392-glusterd-Brick-process-fails-to-come-up-with-brickmu.patch +Patch0393: 0393-afr-restore-timestamp-of-files-during-metadata-heal.patch +Patch0394: 0394-man-gluster-Add-volume-top-command-to-gluster-man-pa.patch +Patch0395: 0395-Cli-Removing-old-log-rotate-command.patch +Patch0396: 0396-Updating-gluster-manual.patch +Patch0397: 0397-mgmt-brick-mux-Avoid-sending-two-response-when-attac.patch +Patch0398: 0398-ec-change-error-message-for-heal-commands-for-disper.patch +Patch0399: 0399-glusterd-coverity-fixes.patch +Patch0400: 0400-cli-throw-a-warning-if-replica-count-greater-than-3.patch +Patch0401: 0401-cli-change-the-warning-message.patch +Patch0402: 0402-afr-wake-up-index-healer-threads.patch +Patch0403: 0403-Fix-spurious-failure-in-bug-1744548-heal-timeout.t.patch +Patch0404: 0404-tests-Fix-spurious-failure.patch +Patch0405: 0405-core-fix-return-of-local-in-__nlc_inode_ctx_get.patch +Patch0406: 0406-afr-support-split-brain-CLI-for-replica-3.patch +Patch0407: 0407-geo-rep-Improving-help-message-in-schedule_georep.py.patch +Patch0408: 0408-geo-rep-Fix-ssh-port-validation.patch +Patch0409: 0409-system-posix-acl-update-ctx-only-if-iatt-is-non-NULL.patch +Patch0410: 0410-afr-prevent-spurious-entry-heals-leading-to-gfid-spl.patch +Patch0411: 0411-tools-glusterfind-validate-session-name.patch +Patch0412: 0412-gluster-smb-add-smb-parameter-when-access-gluster-by.patch +Patch0413: 0413-extras-hooks-Remove-smb.conf-parameter-allowing-gues.patch +Patch0414: 0414-cluster-syncop-avoid-duplicate-unlock-of-inodelk-ent.patch +Patch0415: 0415-dht-Fix-stale-layout-and-create-issue.patch +Patch0416: 0416-tests-fix-spurious-failure-of-bug-1402841.t-mt-dir-s.patch +Patch0417: 0417-events-fix-IPv6-memory-corruption.patch +Patch0418: 0418-md-cache-avoid-clearing-cache-when-not-necessary.patch +Patch0419: 0419-cluster-afr-fix-race-when-bricks-come-up.patch +Patch0420: 0420-scripts-quota_fsck-script-TypeError-d-format-not-dic.patch +Patch0421: 0421-Improve-logging-in-EC-client-and-lock-translator.patch +Patch0422: 0422-cluster-afr-Prioritize-ENOSPC-over-other-errors.patch +Patch0423: 0423-ctime-Fix-ctime-inconsisteny-with-utimensat.patch +Patch0424: 0424-afr-make-heal-info-lockless.patch +Patch0425: 0425-tests-Fix-spurious-self-heald.t-failure.patch +Patch0426: 0426-geo-rep-Fix-for-Transport-End-Point-not-connected-is.patch +Patch0427: 0427-storage-posix-Fixing-a-coverity-issue.patch +Patch0428: 0428-glusterd-ganesha-fixing-resource-leak-in-tear_down_c.patch +Patch0429: 0429-dht-rebalance-fixing-failure-occurace-due-to-rebalan.patch +Patch0430: 0430-Fix-some-Null-pointer-dereference-coverity-issues.patch +Patch0431: 0431-glusterd-check-for-same-node-while-adding-bricks-in-.patch +Patch0432: 0432-glusterd-Fix-coverity-defects-put-coverity-annotatio.patch +Patch0433: 0433-socket-Resolve-ssl_ctx-leak-for-a-brick-while-only-m.patch +Patch0434: 0434-glusterd-ganesha-fix-Coverity-CID-1405785.patch +Patch0435: 0435-glusterd-coverity-fix.patch +Patch0436: 0436-glusterd-coverity-fixes.patch +Patch0437: 0437-glusterd-prevent-use-after-free-in-glusterd_op_ac_se.patch +Patch0438: 0438-dht-sparse-files-rebalance-enhancements.patch +Patch0439: 0439-cluster-afr-Delay-post-op-for-fsync.patch +Patch0440: 0440-glusterd-snapshot-Improve-log-message-during-snapsho.patch +Patch0441: 0441-fuse-occasional-logging-for-fuse-device-weird-write-.patch +Patch0442: 0442-fuse-correctly-handle-setxattr-values.patch +Patch0443: 0443-fuse-fix-high-sev-coverity-issue.patch +Patch0444: 0444-mount-fuse-Fixing-a-coverity-issue.patch +Patch0445: 0445-feature-changelog-Avoid-thread-creation-if-xlator-is.patch +Patch0446: 0446-bitrot-Make-number-of-signer-threads-configurable.patch +Patch0447: 0447-core-brick_mux-brick-crashed-when-creating-and-delet.patch +Patch0448: 0448-Posix-Use-simple-approach-to-close-fd.patch +Patch0449: 0449-test-Test-case-brick-mux-validation-in-cluster.t-is-.patch +Patch0450: 0450-tests-basic-ctime-enable-ctime-before-testing.patch +Patch0451: 0451-extras-Modify-group-virt-to-include-network-related-.patch +Patch0452: 0452-Tier-DHT-Handle-the-pause-case-missed-out.patch +Patch0453: 0453-glusterd-add-brick-command-failure.patch +Patch0454: 0454-features-locks-avoid-use-after-freed-of-frame-for-bl.patch +Patch0455: 0455-locks-prevent-deletion-of-locked-entries.patch +Patch0456: 0456-add-clean-local-after-grant-lock.patch +Patch0457: 0457-cluster-ec-Improve-detection-of-new-heals.patch +Patch0458: 0458-features-bit-rot-stub-clean-the-mutex-after-cancelli.patch +Patch0459: 0459-features-bit-rot-Unconditionally-sign-the-files-duri.patch +Patch0460: 0460-cluster-ec-Remove-stale-entries-from-indices-xattrop.patch +Patch0461: 0461-geo-replication-Fix-IPv6-parsing.patch +Patch0462: 0462-Issue-with-gf_fill_iatt_for_dirent.patch +Patch0463: 0463-cluster-ec-Change-handling-of-heal-failure-to-avoid-.patch +Patch0464: 0464-storage-posix-Remove-nr_files-usage.patch +Patch0465: 0465-posix-Implement-a-janitor-thread-to-close-fd.patch +Patch0466: 0466-cluster-ec-Change-stale-index-handling.patch +Patch0467: 0467-build-Added-dependency-for-glusterfs-selinux.patch +Patch0468: 0468-build-Update-the-glusterfs-selinux-version.patch +Patch0469: 0469-cluster-ec-Don-t-trigger-heal-for-stale-index.patch +Patch0470: 0470-extras-snap_scheduler-changes-in-gluster-shared-stor.patch +Patch0471: 0471-nfs-ganesha-gluster_shared_storage-fails-to-automoun.patch +Patch0472: 0472-geo-rep-gluster_shared_storage-fails-to-automount-on.patch +Patch0473: 0473-glusterd-Fix-Add-brick-with-increasing-replica-count.patch +Patch0474: 0474-features-locks-posixlk-clear-lock-should-set-error-a.patch +Patch0475: 0475-fuse-lock-interrupt-fix-flock_interrupt.t.patch +Patch0476: 0476-mount-fuse-use-cookies-to-get-fuse-interrupt-record-.patch +Patch0477: 0477-glusterd-snapshot-Snapshot-prevalidation-failure-not.patch +Patch0478: 0478-DHT-Fixing-rebalance-failure-on-issuing-stop-command.patch +Patch0479: 0479-ganesha-ha-revised-regex-exprs-for-status.patch +Patch0480: 0480-DHT-Rebalance-Ensure-Rebalance-reports-status-only-o.patch +Patch0481: 0481-RHGS-3.5.3-rebuild-to-ship-with-RHEL.patch +Patch0482: 0482-logger-Always-print-errors-in-english.patch +Patch0483: 0483-afr-more-quorum-checks-in-lookup-and-new-entry-marki.patch +Patch0484: 0484-glusterd-rebalance-status-displays-stats-as-0-after-.patch +Patch0485: 0485-cli-rpc-conditional-init-of-global-quota-rpc-1578.patch +Patch0486: 0486-glusterd-brick-sock-file-deleted-log-error-1560.patch +Patch0487: 0487-Events-Log-file-not-re-opened-after-logrotate.patch +Patch0488: 0488-glusterd-afr-enable-granular-entry-heal-by-default.patch +Patch0489: 0489-glusterd-fix-bug-in-enabling-granular-entry-heal.patch +Patch0490: 0490-Segmentation-fault-occurs-during-truncate.patch +Patch0491: 0491-glusterd-mount-directory-getting-truncated-on-mounti.patch +Patch0492: 0492-afr-lookup-Pass-xattr_req-in-while-doing-a-selfheal-.patch +Patch0493: 0493-geo-rep-Note-section-is-required-for-ignore_deletes.patch +Patch0494: 0494-glusterd-start-the-brick-on-a-different-port.patch +Patch0495: 0495-geo-rep-descriptive-message-when-worker-crashes-due-.patch +Patch0496: 0496-posix-Use-MALLOC-instead-of-alloca-to-allocate-memor.patch +Patch0497: 0497-socket-Use-AES128-cipher-in-SSL-if-AES-is-supported-.patch +Patch0498: 0498-geo-rep-Fix-corner-case-in-rename-on-mkdir-during-hy.patch +Patch0499: 0499-gfapi-give-appropriate-error-when-size-exceeds.patch +Patch0500: 0500-features-shard-Convert-shard-block-indices-to-uint64.patch +Patch0501: 0501-Cli-Removing-old-syntax-of-tier-cmds-from-help-menu.patch +Patch0502: 0502-dht-fixing-a-permission-update-issue.patch +Patch0503: 0503-gfapi-Suspend-synctasks-instead-of-blocking-them.patch +Patch0504: 0504-io-stats-Configure-ios_sample_buf_size-based-on-samp.patch +Patch0505: 0505-trash-Create-inode_table-only-while-feature-is-enabl.patch +Patch0506: 0506-posix-Attach-a-posix_spawn_disk_thread-with-glusterf.patch +Patch0507: 0507-inode-make-critical-section-smaller.patch +Patch0508: 0508-fuse-fetch-arbitrary-number-of-groups-from-proc-pid-.patch +Patch0509: 0509-core-configure-optimum-inode-table-hash_size-for-shd.patch +Patch0510: 0510-glusterd-brick_mux-Optimize-friend-handshake-code-to.patch +Patch0511: 0511-features-shard-Missing-format-specifier.patch +Patch0512: 0512-glusterd-shared-storage-mount-fails-in-ipv6-environm.patch +Patch0513: 0513-afr-mark-pending-xattrs-as-a-part-of-metadata-heal.patch +Patch0514: 0514-afr-event-gen-changes.patch +Patch0515: 0515-cluster-afr-Heal-directory-rename-without-rmdir-mkdi.patch +Patch0516: 0516-afr-return-EIO-for-gfid-split-brains.patch +Patch0517: 0517-gfapi-glfs_h_creat_open-new-API-to-create-handle-and.patch +Patch0518: 0518-glusterd-Fix-for-shared-storage-in-ipv6-env.patch +Patch0519: 0519-glusterfs-events-Fix-incorrect-attribute-access-2002.patch +Patch0520: 0520-performance-open-behind-seek-fop-should-open_and_res.patch +Patch0521: 0521-open-behind-fix-missing-fd-reference.patch +Patch0522: 0522-lcov-improve-line-coverage.patch +Patch0523: 0523-open-behind-rewrite-of-internal-logic.patch +Patch0524: 0524-open-behind-fix-call_frame-leak.patch +Patch0525: 0525-open-behind-implement-create-fop.patch +Patch0526: 0526-Quota-quota_fsck.py-converting-byte-string-to-string.patch +Patch0527: 0527-Events-Socket-creation-after-getaddrinfo-and-IPv4-an.patch +Patch0528: 0528-Extras-Removing-xattr_analysis-script.patch +Patch0529: 0529-geo-rep-prompt-should-work-for-ignore_deletes.patch +Patch0530: 0530-gfapi-avoid-crash-while-logging-message.patch +Patch0531: 0531-Glustereventsd-Default-port-change-2091.patch +Patch0532: 0532-glusterd-fix-for-starting-brick-on-new-port.patch +Patch0533: 0533-glusterd-Rebalance-cli-is-not-showing-correct-status.patch +Patch0534: 0534-glusterd-Resolve-use-after-free-bug-2181.patch +Patch0535: 0535-multiple-files-use-dict_allocate_and_serialize-where.patch +Patch0536: 0536-dht-Ongoing-IO-is-failed-during-volume-shrink-operat.patch +Patch0537: 0537-cluster-afr-Fix-race-in-lockinfo-f-getxattr.patch +Patch0538: 0538-afr-fix-coverity-issue-introduced-by-90cefde.patch +Patch0539: 0539-extras-disable-lookup-optimize-in-virt-and-block-gro.patch +Patch0540: 0540-extras-Disable-write-behind-for-group-samba.patch +Patch0541: 0541-glusterd-volgen-Add-functionality-to-accept-any-cust.patch +Patch0542: 0542-xlaotrs-mgmt-Fixing-coverity-issue-1445996.patch +Patch0543: 0543-glusterd-handle-custom-xlator-failure-cases.patch +Patch0900: 0900-rhel-9.0-beta-build-fixing-gcc-10-and-LTO-errors.patch +Patch0901: 0901-contrib-remove-contrib-sunrpc-xdr_sizeof.c.patch + +%description +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package includes the glusterfs binary, the glusterfsd daemon and the +libglusterfs and glusterfs translator modules common to both GlusterFS server +and client framework. + +%package api +Summary: GlusterFS api library +Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} + +%description api +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the glusterfs libgfapi library. + +%package api-devel +Summary: Development Libraries +Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-devel%{?_isa} = %{version}-%{release} +Requires: libacl-devel +Requires: %{name}-api%{?_isa} = %{version}-%{release} + +%description api-devel +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the api include files. + +%package cli +Summary: GlusterFS CLI +Requires: %{name}-libs%{?_isa} = %{version}-%{release} + +%description cli +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the GlusterFS CLI application and its man page + +%package cloudsync-plugins +Summary: Cloudsync Plugins +BuildRequires: libcurl-devel +Requires: glusterfs-libs = %{version}-%{release} + +%description cloudsync-plugins +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides cloudsync plugins for archival feature. + +%package devel +Summary: Development Libraries +Requires: %{name}%{?_isa} = %{version}-%{release} +# Needed for the Glupy examples to work +%if ( 0%{!?_without_extra_xlators:1} ) +Requires: %{name}-extra-xlators%{?_isa} = %{version}-%{release} +%endif +Requires: %{name}-libs%{?_isa} = %{version}-%{release} +%if ( 0%{!?_without_server:1} ) +Requires: %{name}-server%{?_isa} = %{version}-%{release} +%endif + +%description devel +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the development libraries and include files. + +%if ( 0%{!?_without_extra_xlators:1} ) +%package extra-xlators +Summary: Extra Gluster filesystem Translators +# We need python-gluster rpm for gluster module's __init__.py in Python +# site-packages area +Requires: python%{_pythonver}-gluster = %{version}-%{release} +Requires: python%{_pythonver} + +%description extra-xlators +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides extra filesystem Translators, such as Glupy, +for GlusterFS. +%endif + +%package fuse +Summary: Fuse client +BuildRequires: fuse-devel +Requires: attr +Requires: psmisc + +Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} + +Obsoletes: %{name}-client < %{version}-%{release} +Provides: %{name}-client = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} + +%description fuse +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides support to FUSE based clients and inlcudes the +glusterfs(d) binary. + +%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 ) +%package ganesha +Summary: NFS-Ganesha configuration +Group: Applications/File + +Requires: %{name}-server%{?_isa} = %{version}-%{release} +Requires: nfs-ganesha-selinux >= 2.7.3 +Requires: nfs-ganesha-gluster >= 2.7.3 +Requires: pcs, dbus +%if ( 0%{?rhel} && 0%{?rhel} == 6 ) +Requires: cman, pacemaker, corosync +%endif + +%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 ) +# we need portblock resource-agent in 3.9.5 and later. +Requires: resource-agents >= 3.9.5 +Requires: net-tools +%endif + +%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) +%if ( 0%{?rhel} && 0%{?rhel} < 8 ) +Requires: selinux-policy >= 3.13.1-160 +Requires(post): policycoreutils-python +Requires(postun): policycoreutils-python +%else +Requires(post): policycoreutils-python-utils +Requires(postun): policycoreutils-python-utils +%endif +%endif + +%description ganesha +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the configuration and related files for using +NFS-Ganesha as the NFS server using GlusterFS +%endif + +%if ( 0%{!?_without_georeplication:1} ) +%package geo-replication +Summary: GlusterFS Geo-replication +Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-server%{?_isa} = %{version}-%{release} +Requires: python%{_pythonver} +%if ( 0%{?rhel} && 0%{?rhel} < 7 ) +Requires: python-prettytable +%else +Requires: python%{_pythonver}-prettytable +%endif +Requires: python%{_pythonver}-gluster = %{version}-%{release} + +Requires: rsync +Requires: util-linux +Requires: %{name}-libs%{?_isa} = %{version}-%{release} +# required for setting selinux bools +%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) +Requires(post): policycoreutils-python-utils +Requires(postun): policycoreutils-python-utils +Requires: selinux-policy-targeted +Requires(post): selinux-policy-targeted +BuildRequires: selinux-policy-devel +%endif + +%description geo-replication +GlusterFS is a distributed file-system capable of scaling to several +peta-bytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file system in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in userspace and easily manageable. + +This package provides support to geo-replication. +%endif + +%package libs +Summary: GlusterFS common libraries + +%description libs +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the base GlusterFS libraries + +%package -n python%{_pythonver}-gluster +Summary: GlusterFS python library +Requires: python%{_pythonver} +%if ( ! %{_usepython3} ) +%{?python_provide:%python_provide python-gluster} +Provides: python-gluster = %{version}-%{release} +Obsoletes: python-gluster < 3.10 +%endif + +%description -n python%{_pythonver}-gluster +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package contains the python modules of GlusterFS and own gluster +namespace. + +%if ( 0%{!?_without_rdma:1} ) +%package rdma +Summary: GlusterFS rdma support for ib-verbs +%if ( 0%{?fedora} && 0%{?fedora} > 26 ) +BuildRequires: rdma-core-devel +%else +BuildRequires: libibverbs-devel +BuildRequires: librdmacm-devel >= 1.0.15 +%endif +Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} + +%description rdma +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides support to ib-verbs library. +%endif + +%if ( 0%{!?_without_regression_tests:1} ) +%package regression-tests +Summary: Development Tools +Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-fuse%{?_isa} = %{version}-%{release} +Requires: %{name}-server%{?_isa} = %{version}-%{release} +## thin provisioning support +Requires: lvm2 >= 2.02.89 +Requires: perl(App::Prove) perl(Test::Harness) gcc util-linux-ng +Requires: python%{_pythonver} +Requires: attr dbench file git libacl-devel net-tools +Requires: nfs-utils xfsprogs yajl psmisc bc + +%description regression-tests +The Gluster Test Framework, is a suite of scripts used for +regression testing of Gluster. +%endif + +%if ( 0%{!?_without_ocf:1} ) +%package resource-agents +Summary: OCF Resource Agents for GlusterFS +License: GPLv3+ +BuildArch: noarch +# this Group handling comes from the Fedora resource-agents package +# for glusterd +Requires: %{name}-server = %{version}-%{release} +# depending on the distribution, we need pacemaker or resource-agents +Requires: %{_prefix}/lib/ocf/resource.d + +%description resource-agents +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the resource agents which plug glusterd into +Open Cluster Framework (OCF) compliant cluster resource managers, +like Pacemaker. +%endif + +%if ( 0%{!?_without_server:1} ) +%package server +Summary: Clustered file-system server +Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-cli%{?_isa} = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} +%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) ) +Requires: glusterfs-selinux >= 1.0-1 +%endif +# some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse +Requires: %{name}-fuse%{?_isa} = %{version}-%{release} +# self-heal daemon, rebalance, nfs-server etc. are actually clients +Requires: %{name}-api%{?_isa} = %{version}-%{release} +Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release} +# lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server +Requires: lvm2 +Requires: nfs-utils +%if ( 0%{?_with_systemd:1} ) +%{?systemd_requires} +%else +Requires(post): /sbin/chkconfig +Requires(preun): /sbin/service +Requires(preun): /sbin/chkconfig +Requires(postun): /sbin/service +%endif +%if (0%{?_with_firewalld:1}) +# we install firewalld rules, so we need to have the directory owned +%if ( 0%{!?rhel} ) +# not on RHEL because firewalld-filesystem appeared in 7.3 +# when EL7 rpm gets weak dependencies we can add a Suggests: +Requires: firewalld-filesystem +%endif +%endif +%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) +Requires: rpcbind +%else +Requires: portmap +%endif +%if ( 0%{?rhel} && 0%{?rhel} <= 6 ) +Requires: python-argparse +%endif +%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 ) +Requires: python%{_pythonver}-pyxattr +%else +Requires: pyxattr +%endif +%if (0%{?_with_valgrind:1}) +Requires: valgrind +%endif + +%description server +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the glusterfs server daemon. +%endif + +%package client-xlators +Summary: GlusterFS client-side translators +Requires: %{name}-libs%{?_isa} = %{version}-%{release} + +%description client-xlators +GlusterFS is a distributed file-system capable of scaling to several +petabytes. It aggregates various storage bricks over Infiniband RDMA +or TCP/IP interconnect into one large parallel network file +system. GlusterFS is one of the most sophisticated file systems in +terms of features and extensibility. It borrows a powerful concept +called Translators from GNU Hurd kernel. Much of the code in GlusterFS +is in user space and easily manageable. + +This package provides the translators needed on any GlusterFS client. + +%if ( 0%{!?_without_events:1} ) +%package events +Summary: GlusterFS Events +Requires: %{name}-server%{?_isa} = %{version}-%{release} +Requires: python%{_pythonver} +Requires: python%{_pythonver}-gluster = %{version}-%{release} +%if ( 0%{?rhel} && 0%{?rhel} < 8 ) +Requires: python-requests +%else +Requires: python%{_pythonver}-requests +%endif +%if ( 0%{?rhel} && 0%{?rhel} < 7 ) +Requires: python-prettytable +Requires: python-argparse +%else +Requires: python%{_pythonver}-prettytable +%endif +%if ( 0%{?_with_systemd:1} ) +%{?systemd_requires} +%endif + +%description events +GlusterFS Events + +%endif + +%prep +%setup -q -n %{name}-%{version}%{?prereltag} + +# sanitization scriptlet for patches with file renames +ls %{_topdir}/SOURCES/*.patch | sort | \ +while read p +do + # if the destination file exists, its most probably stale + # so we must remove it + rename_to=( $(grep -i 'rename to' $p | cut -f 3 -d ' ') ) + if [ ${#rename_to[*]} -gt 0 ]; then + for f in ${rename_to[*]} + do + if [ -f $f ]; then + rm -f $f + elif [ -d $f ]; then + rm -rf $f + fi + done + fi + + SOURCE_FILES=( $(egrep '^\-\-\- a/' $p | cut -f 2- -d '/') ) + DEST_FILES=( $(egrep '^\+\+\+ b/' $p | cut -f 2- -d '/') ) + EXCLUDE_DOCS=() + for idx in ${!SOURCE_FILES[@]}; do + # skip the doc + source_file=${SOURCE_FILES[$idx]} + dest_file=${DEST_FILES[$idx]} + if [[ "$dest_file" =~ ^doc/.+ ]]; then + if [ "$source_file" != "dev/null" ] && [ ! -f "$dest_file" ]; then + # if patch is being applied to a doc file and if the doc file + # hasn't been added so far then we need to exclude it + EXCLUDE_DOCS=( ${EXCLUDE_DOCS[*]} "$dest_file" ) + fi + fi + done + EXCLUDE_DOCS_OPT="" + for doc in ${EXCLUDE_DOCS}; do + EXCLUDE_DOCS_OPT="--exclude=$doc $EXCLUDE_DOCS_OPT" + done + + # HACK to fix build + bn=$(basename $p) + if [ "$bn" == "0085-Revert-all-remove-code-which-is-not-being-considered.patch" ]; then + (patch -p1 -u -F3 < $p || :) + if [ -f libglusterfs/Makefile.am.rej ]; then + sed -i -e 's/^SUBDIRS = src/SUBDIRS = src src\/gfdb/g;s/^CLEANFILES = /CLEANFILES =/g' libglusterfs/Makefile.am + fi + elif [ "$bn" == "0053-Revert-packaging-ganesha-remove-glusterfs-ganesha-su.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0055-Revert-storhaug-HA-first-step-remove-resource-agents.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0090-Revert-build-conditionally-build-legacy-gNFS-server-.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0117-spec-Remove-thin-arbiter-package.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0023-hooks-remove-selinux-hooks.patch" ]; then + (patch -p1 < $p || :) + elif [ "$bn" == "0042-spec-client-server-Builds-are-failing-on-rhel-6.patch" ]; then + (patch -p1 < $p || :) + else + # apply the patch with 'git apply' + git apply -p1 --exclude=rfc.sh \ + --exclude=.gitignore \ + --exclude=.testignore \ + --exclude=MAINTAINERS \ + --exclude=extras/checkpatch.pl \ + --exclude=build-aux/checkpatch.pl \ + --exclude='tests/*' \ + ${EXCLUDE_DOCS_OPT} \ + $p + fi + +done + +echo "fixing python shebangs..." +%if ( %{_usepython3} ) + for i in `find . -type f -exec bash -c "if file {} | grep 'Python script, ASCII text executable' >/dev/null; then echo {}; fi" ';'`; do + sed -i -e 's|^#!/usr/bin/python.*|#!%{__python3}|' -e 's|^#!/usr/bin/env python.*|#!%{__python3}|' $i + done +%else + for f in api events extras geo-replication libglusterfs tools xlators; do + find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \; + done +%endif + +%build + +# In RHEL7 few hardening flags are available by default, however the RELRO +# default behaviour is partial, convert to full +%if ( 0%{?rhel} && 0%{?rhel} >= 7 ) +LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now" +export LDFLAGS +%else +%if ( 0%{?rhel} && 0%{?rhel} == 6 ) +CFLAGS="$RPM_OPT_FLAGS -fPIE -DPIE" +LDFLAGS="$RPM_LD_FLAGS -pie -Wl,-z,relro,-z,now" +%else +#It appears that with gcc-4.1.2 in RHEL5 there is an issue using both -fPIC and + # -fPIE that makes -z relro not work; -fPIE seems to undo what -fPIC does +CFLAGS="$CFLAGS $RPM_OPT_FLAGS" +LDFLAGS="$RPM_LD_FLAGS -Wl,-z,relro,-z,now" +%endif +export CFLAGS +export LDFLAGS +%endif + +./autogen.sh && %configure \ + %{?_with_asan} \ + %{?_with_cmocka} \ + %{?_with_debug} \ + %{?_with_firewalld} \ + %{?_with_tmpfilesdir} \ + %{?_with_tsan} \ + %{?_with_valgrind} \ + %{?_without_epoll} \ + %{?_without_events} \ + %{?_without_fusermount} \ + %{?_without_georeplication} \ + %{?_without_ocf} \ + %{?_without_rdma} \ + %{?_without_server} \ + %{?_without_syslog} \ + %{?_without_tiering} \ + %{?_with_ipv6default} \ + %{?_without_libtirpc} + +# fix hardening and remove rpath in shlibs +%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) +sed -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool +%endif +sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool +sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool + +make %{?_smp_mflags} + +%check +make check + +%install +rm -rf %{buildroot} +make install DESTDIR=%{buildroot} +%if ( 0%{!?_without_server:1} ) +%if ( 0%{_for_fedora_koji_builds} ) +install -D -p -m 0644 %{SOURCE1} \ + %{buildroot}%{_sysconfdir}/sysconfig/glusterd +install -D -p -m 0644 %{SOURCE2} \ + %{buildroot}%{_sysconfdir}/sysconfig/glusterfsd +%else +install -D -p -m 0644 extras/glusterd-sysconfig \ + %{buildroot}%{_sysconfdir}/sysconfig/glusterd +%endif +%endif + +mkdir -p %{buildroot}%{_localstatedir}/log/glusterd +mkdir -p %{buildroot}%{_localstatedir}/log/glusterfs +mkdir -p %{buildroot}%{_localstatedir}/log/glusterfsd +mkdir -p %{buildroot}%{_rundir}/gluster + +# Remove unwanted files from all the shared libraries +find %{buildroot}%{_libdir} -name '*.a' -delete +find %{buildroot}%{_libdir} -name '*.la' -delete + +# Remove installed docs, the ones we want are included by %%doc, in +# /usr/share/doc/glusterfs or /usr/share/doc/glusterfs-x.y.z depending +# on the distribution +%if ( 0%{?fedora} && 0%{?fedora} > 19 ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) +rm -rf %{buildroot}%{_pkgdocdir}/* +%else +rm -rf %{buildroot}%{_defaultdocdir}/%{name} +mkdir -p %{buildroot}%{_pkgdocdir} +%endif +head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog +cat << EOM >> ChangeLog + +More commit messages for this ChangeLog can be found at +https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prereltag} +EOM + +# Remove benchmarking and other unpackaged files +# make install always puts these in %%{_defaultdocdir}/%%{name} so don't +# use %%{_pkgdocdir}; that will be wrong on later Fedora distributions +rm -rf %{buildroot}%{_defaultdocdir}/%{name}/benchmarking +rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs-mode.el +rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs.vim + +%if ( 0%{!?_without_server:1} ) +# Create working directory +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd + +# Update configuration file to /var/lib working directory +sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \ + %{buildroot}%{_sysconfdir}/glusterfs/glusterd.vol +%endif + +# Install glusterfsd .service or init.d file +%if ( 0%{!?_without_server:1} ) +%if ( 0%{_for_fedora_koji_builds} ) +%service_install glusterfsd %{glusterfsd_svcfile} +%endif +%endif + +install -D -p -m 0644 extras/glusterfs-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs + +# ganesha ghosts +%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 ) +mkdir -p %{buildroot}%{_sysconfdir}/ganesha +touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf +mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ +touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf +touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf +%endif + +%if ( 0%{!?_without_georeplication:1} ) +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication +touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf +install -D -p -m 0644 extras/glusterfs-georep-logrotate \ + %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep +%endif + +%if ( 0%{!?_without_server:1} ) +touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info +touch %{buildroot}%{_sharedstatedir}/glusterd/options +subdirs=(add-brick create copy-file delete gsync-create remove-brick reset set start stop) +for dir in ${subdirs[@]}; do + mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/"$dir"/{pre,post} +done +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/glustershd +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/peers +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/vols +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/nfs/run +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/bitd +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/quotad +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/scrub +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/snaps +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/ss_brick +touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol +touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid +%endif + +find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs + +## Install bash completion for cli +install -p -m 0744 -D extras/command-completion/gluster.bash \ + %{buildroot}%{_sysconfdir}/bash_completion.d/gluster + +%if ( 0%{!?_without_server:1} ) +echo "RHGS 3.5" > %{buildroot}%{_datadir}/glusterfs/release +%endif + +%clean +rm -rf %{buildroot} + +##----------------------------------------------------------------------------- +## All %%post should be placed here and keep them sorted +## +%post +/sbin/ldconfig +%if ( 0%{!?_without_syslog:1} ) +%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) +%systemd_postun_with_restart rsyslog +%endif +%endif +exit 0 + +%post api +/sbin/ldconfig + +%if ( 0%{!?_without_events:1} ) +%post events +%service_enable glustereventsd +%endif + +%if ( 0%{!?_without_server:1} ) +%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) +%post ganesha +semanage boolean -m ganesha_use_fusefs --on +exit 0 +%endif +%endif + +%if ( 0%{!?_without_georeplication:1} ) +%post geo-replication +%if ( 0%{?rhel} && 0%{?rhel} >= 8 ) +%selinux_set_booleans %{selinuxbooleans} +%endif +if [ $1 -ge 1 ]; then + %systemd_postun_with_restart glusterd +fi +exit 0 +%endif + +%post libs +/sbin/ldconfig + +%if ( 0%{!?_without_server:1} ) +%post server +# Legacy server +%service_enable glusterd +%if ( 0%{_for_fedora_koji_builds} ) +%service_enable glusterfsd +%endif +# ".cmd_log_history" is renamed to "cmd_history.log" in GlusterFS-3.7 . +# While upgrading glusterfs-server package form GlusterFS version <= 3.6 to +# GlusterFS version 3.7, ".cmd_log_history" should be renamed to +# "cmd_history.log" to retain cli command history contents. +if [ -f %{_localstatedir}/log/glusterfs/.cmd_log_history ]; then + mv %{_localstatedir}/log/glusterfs/.cmd_log_history \ + %{_localstatedir}/log/glusterfs/cmd_history.log +fi + +# Genuine Fedora (and EPEL) builds never put gluster files in /etc; if +# there are any files in /etc from a prior gluster.org install, move them +# to /var/lib. (N.B. Starting with 3.3.0 all gluster files are in /var/lib +# in gluster.org RPMs.) Be careful to copy them on the off chance that +# /etc and /var/lib are on separate file systems +if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then + mkdir -p %{_sharedstatedir}/glusterd + cp -a /etc/glusterd %{_sharedstatedir}/glusterd + rm -rf /etc/glusterd + ln -sf %{_sharedstatedir}/glusterd /etc/glusterd +fi + +# Rename old volfiles in an RPM-standard way. These aren't actually +# considered package config files, so %%config doesn't work for them. +if [ -d %{_sharedstatedir}/glusterd/vols ]; then + for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do + newfile=${file}.rpmsave + echo "warning: ${file} saved as ${newfile}" + cp ${file} ${newfile} + done +fi + +# add marker translator +# but first make certain that there are no old libs around to bite us +# BZ 834847 +if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then + rm -f /etc/ld.so.conf.d/glusterfs.conf + /sbin/ldconfig +fi + +%if (0%{?_with_firewalld:1}) + %firewalld_reload +%endif + +%endif + +##----------------------------------------------------------------------------- +## All %%pre should be placed here and keep them sorted +## +%pre +getent group gluster > /dev/null || groupadd -r gluster +getent passwd gluster > /dev/null || useradd -r -g gluster -d %{_rundir}/gluster -s /sbin/nologin -c "GlusterFS daemons" gluster +exit 0 + +##----------------------------------------------------------------------------- +## All %%preun should be placed here and keep them sorted +## +%if ( 0%{!?_without_events:1} ) +%preun events +if [ $1 -eq 0 ]; then + if [ -f %glustereventsd_svcfile ]; then + %service_stop glustereventsd + %systemd_preun glustereventsd + fi +fi +exit 0 +%endif + +%if ( 0%{!?_without_server:1} ) +%preun server +if [ $1 -eq 0 ]; then + if [ -f %glusterfsd_svcfile ]; then + %service_stop glusterfsd + fi + %service_stop glusterd + if [ -f %glusterfsd_svcfile ]; then + %systemd_preun glusterfsd + fi + %systemd_preun glusterd +fi +if [ $1 -ge 1 ]; then + if [ -f %glusterfsd_svcfile ]; then + %systemd_postun_with_restart glusterfsd + fi + %systemd_postun_with_restart glusterd +fi +exit 0 +%endif + +##----------------------------------------------------------------------------- +## All %%postun should be placed here and keep them sorted +## +%postun +%if ( 0%{!?_without_syslog:1} ) +%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) +%systemd_postun_with_restart rsyslog +%endif +%endif + +%if ( 0%{!?_without_server:1} ) +%postun server +%if (0%{?_with_firewalld:1}) + %firewalld_reload +%endif +exit 0 +%endif + +%if ( 0%{!?_without_server:1} ) +%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) +%postun ganesha +semanage boolean -m ganesha_use_fusefs --off +exit 0 +%endif +%endif + +##----------------------------------------------------------------------------- +## All %%trigger should be placed here and keep them sorted +## +%if ( 0%{!?_without_server:1} ) +%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) +%trigger ganesha -- selinux-policy-targeted +semanage boolean -m ganesha_use_fusefs --on +exit 0 +%endif +%endif + +##----------------------------------------------------------------------------- +## All %%triggerun should be placed here and keep them sorted +## +%if ( 0%{!?_without_server:1} ) +%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) ) +%triggerun ganesha -- selinux-policy-targeted +semanage boolean -m ganesha_use_fusefs --off +exit 0 +%endif +%endif + +##----------------------------------------------------------------------------- +## All %%files should be placed here and keep them grouped +## +%files +%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README.md THANKS COMMITMENT +%{_mandir}/man8/*gluster*.8* +%if ( 0%{!?_without_server:1} ) +%exclude %{_mandir}/man8/gluster.8* +%endif +%dir %{_localstatedir}/log/glusterfs +%if ( 0%{!?_without_rdma:1} ) +%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma* +%endif +%if 0%{?!_without_server:1} +%dir %{_datadir}/glusterfs +%dir %{_datadir}/glusterfs/scripts + %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh + %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh +%endif +%{_datadir}/glusterfs/scripts/identify-hangs.sh +%{_datadir}/glusterfs/scripts/collect-system-stats.sh +%{_datadir}/glusterfs/scripts/log_accounting.sh +# xlators that are needed on the client- and on the server-side +%dir %{_libdir}/glusterfs +%dir %{_libdir}/glusterfs/%{version}%{?prereltag} +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/auth + %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/addr.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/login.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport + %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/socket.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/error-gen.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/delay-gen.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/io-stats.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/sink.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/trace.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/access-control.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/utime.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/namespace.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-client.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/worm.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cloudsync.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/meta.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-cache.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-threads.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/md-cache.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/open-behind.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/quick-read.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/read-ahead.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/readdir-ahead.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/stat-prefetch.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/write-behind.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/nl-cache.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system/posix-acl.so +%dir %attr(0775,gluster,gluster) %{_rundir}/gluster +%if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1} +%{_tmpfilesdir}/gluster.conf +%endif +%if ( 0%{?_without_extra_xlators:1} ) +%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so +%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so +%endif +%if ( 0%{?_without_regression_tests:1} ) +%exclude %{_datadir}/glusterfs/run-tests.sh +%exclude %{_datadir}/glusterfs/tests +%endif +%if 0%{?_without_server:1} +%if ( 0%{?_with_systemd:1} ) +%exclude %{_datadir}/glusterfs/scripts/control-cpu-load.sh +%exclude %{_datadir}/glusterfs/scripts/control-mem.sh +%endif +%endif + +%if ( 0%{?_without_server:1} || 0%{?rhel} < 7 ) +#exclude ganesha related files for rhel 6 and client builds +%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample +%exclude %{_libexecdir}/ganesha/* +%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/* +%if ( 0%{!?_without_server:1} ) +%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh +%endif +%endif + +%exclude %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh + +%if ( 0%{?_without_server:1} ) +%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol +%endif + +%files api +%exclude %{_libdir}/*.so +# libgfapi files +%{_libdir}/libgfapi.* +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api.so + +%files api-devel +%{_libdir}/pkgconfig/glusterfs-api.pc +%{_libdir}/libgfapi.so +%dir %{_includedir}/glusterfs +%dir %{_includedir}/glusterfs/api + %{_includedir}/glusterfs/api/* + +%files cli +%{_sbindir}/gluster +%{_mandir}/man8/gluster.8* +%{_sysconfdir}/bash_completion.d/gluster + +%files cloudsync-plugins +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins + %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so + +%files devel +%dir %{_includedir}/glusterfs + %{_includedir}/glusterfs/* +%exclude %{_includedir}/glusterfs/api +%exclude %{_libdir}/libgfapi.so +%{_libdir}/*.so +%if ( 0%{?_without_server:1} ) +%exclude %{_libdir}/pkgconfig/libgfchangelog.pc +%exclude %{_libdir}/libgfchangelog.so +%if ( 0%{!?_without_tiering:1} ) +%exclude %{_libdir}/pkgconfig/libgfdb.pc +%endif +%else +%{_libdir}/pkgconfig/libgfchangelog.pc +%if ( 0%{!?_without_tiering:1} ) +%{_libdir}/pkgconfig/libgfdb.pc +%endif +%endif + +%files client-xlators +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/*.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so + +%if ( 0%{!?_without_extra_xlators:1} ) +%files extra-xlators +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so +%endif + +%files fuse +# glusterfs is a symlink to glusterfsd, -server depends on -fuse. +%{_sbindir}/glusterfs +%{_sbindir}/glusterfsd +%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse.so +/sbin/mount.glusterfs +%if ( 0%{!?_without_fusermount:1} ) +%{_bindir}/fusermount-glusterfs +%endif + +%if ( 0%{!?_without_georeplication:1} ) +%files geo-replication +%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep + +%{_sbindir}/gfind_missing_files +%{_sbindir}/gluster-mountbroker +%dir %{_libexecdir}/glusterfs +%dir %{_libexecdir}/glusterfs/python +%dir %{_libexecdir}/glusterfs/python/syncdaemon + %{_libexecdir}/glusterfs/gsyncd + %{_libexecdir}/glusterfs/python/syncdaemon/* + %{_libexecdir}/glusterfs/gverify.sh + %{_libexecdir}/glusterfs/set_geo_rep_pem_keys.sh + %{_libexecdir}/glusterfs/peer_gsec_create + %{_libexecdir}/glusterfs/peer_mountbroker + %{_libexecdir}/glusterfs/peer_mountbroker.py* + %{_libexecdir}/glusterfs/gfind_missing_files + %{_libexecdir}/glusterfs/peer_georep-sshkey.py* +%{_sbindir}/gluster-georep-sshkey + + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/geo-replication +%ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post/S56glusterd-geo-rep-create-post.sh +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre + +%dir %{_datadir}/glusterfs +%dir %{_datadir}/glusterfs/scripts + %{_datadir}/glusterfs/scripts/get-gfid.sh + %{_datadir}/glusterfs/scripts/slave-upgrade.sh + %{_datadir}/glusterfs/scripts/gsync-upgrade.sh + %{_datadir}/glusterfs/scripts/generate-gfid-file.sh + %{_datadir}/glusterfs/scripts/gsync-sync-gfid + %{_datadir}/glusterfs/scripts/schedule_georep.py* +%endif + +%files libs +%{_libdir}/*.so.* +%exclude %{_libdir}/libgfapi.* +%if ( 0%{!?_without_tiering:1} ) +# libgfdb is only needed server-side +%exclude %{_libdir}/libgfdb.* +%endif + +%files -n python%{_pythonver}-gluster +# introducing glusterfs module in site packages. +# so that all other gluster submodules can reside in the same namespace. +%if ( %{_usepython3} ) +%dir %{python3_sitelib}/gluster + %{python3_sitelib}/gluster/__init__.* + %{python3_sitelib}/gluster/__pycache__ + %{python3_sitelib}/gluster/cliutils +%else +%dir %{python2_sitelib}/gluster + %{python2_sitelib}/gluster/__init__.* + %{python2_sitelib}/gluster/cliutils +%endif + +%if ( 0%{!?_without_rdma:1} ) +%files rdma +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport + %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma* +%endif + +%if ( 0%{!?_without_regression_tests:1} ) +%files regression-tests +%dir %{_datadir}/glusterfs + %{_datadir}/glusterfs/run-tests.sh + %{_datadir}/glusterfs/tests +%exclude %{_datadir}/glusterfs/tests/vagrant +%endif + +%if ( 0%{!?_without_server:1} && 0%{?rhel} > 6 ) +%files ganesha +%dir %{_libexecdir}/ganesha +%{_sysconfdir}/ganesha/ganesha-ha.conf.sample +%{_libexecdir}/ganesha/* +%{_prefix}/lib/ocf/resource.d/heartbeat/* +%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh +%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf +%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha +%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf +%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf +%endif + +%if ( 0%{!?_without_ocf:1} ) +%files resource-agents +# /usr/lib is the standard for OCF, also on x86_64 +%{_prefix}/lib/ocf/resource.d/glusterfs +%endif + +%if ( 0%{!?_without_server:1} ) +%files server +%doc extras/clear_xattrs.sh +%{_datadir}/glusterfs/scripts/xattr_analysis.py* +%{_datadir}/glusterfs/scripts/quota_fsck.py* +# sysconf +%config(noreplace) %{_sysconfdir}/glusterfs +%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol +%exclude %{_sysconfdir}/glusterfs/eventsconfig.json +%config(noreplace) %{_sysconfdir}/sysconfig/glusterd +%if ( 0%{_for_fedora_koji_builds} ) +%config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd +%endif + +# init files +%glusterd_svcfile +%if ( 0%{_for_fedora_koji_builds} ) +%glusterfsd_svcfile +%endif +%if ( 0%{?_with_systemd:1} ) +%glusterfssharedstorage_svcfile +%endif + +# binaries +%{_sbindir}/glusterd +%{_sbindir}/glfsheal +%{_sbindir}/gf_attach +%{_sbindir}/gluster-setgfid2path +# {_sbindir}/glusterfsd is the actual binary, but glusterfs (client) is a +# symlink. The binary itself (and symlink) are part of the glusterfs-fuse +# package, because glusterfs-server depends on that anyway. + +# Manpages +%{_mandir}/man8/gluster-setgfid2path.8* + +# xlators +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so +%if ( 0%{!?_without_tiering:1} ) + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so + %{_libdir}/libgfdb.so.* +%endif + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix* + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota* + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs* +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so +%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance + %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so + +# snap_scheduler +%{_sbindir}/snap_scheduler.py +%{_sbindir}/gcron.py +%{_sbindir}/conf.py + +# /var/lib/glusterd, e.g. hookscripts, etc. +%ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info +%ghost %attr(0600,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/options + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/bitd + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/virt + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/metadata-cache + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/gluster-block + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt + %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/samba + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1 + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post + %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/post +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S30samba-set.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S32gluster_enable_shared_storage.sh +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post + %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S30samba-stop.sh + %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S29CTDB-teardown.sh +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs +%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run +%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/snaps +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/ss_brick +%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/vols + +# Extra utility script +%dir %{_libexecdir}/glusterfs + %{_datadir}/glusterfs/release +%dir %{_datadir}/glusterfs/scripts + %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh +%if ( 0%{?_with_systemd:1} ) + %{_libexecdir}/glusterfs/mount-shared-storage.sh + %{_datadir}/glusterfs/scripts/control-cpu-load.sh + %{_datadir}/glusterfs/scripts/control-mem.sh +%endif + +# Incrementalapi + %{_libexecdir}/glusterfs/glusterfind +%{_bindir}/glusterfind + %{_libexecdir}/glusterfs/peer_add_secret_pub + +%if ( 0%{?_with_firewalld:1} ) +%{_prefix}/lib/firewalld/services/glusterfs.xml +%endif +# end of server files +%endif + +# Events +%if ( 0%{!?_without_events:1} ) +%files events +%config(noreplace) %{_sysconfdir}/glusterfs/eventsconfig.json +%dir %{_sharedstatedir}/glusterd +%dir %{_sharedstatedir}/glusterd/events +%dir %{_libexecdir}/glusterfs + %{_libexecdir}/glusterfs/gfevents + %{_libexecdir}/glusterfs/peer_eventsapi.py* +%{_sbindir}/glustereventsd +%{_sbindir}/gluster-eventsapi +%{_datadir}/glusterfs/scripts/eventsdash.py* +%if ( 0%{?_with_systemd:1} ) +%{_unitdir}/glustereventsd.service +%else +%{_sysconfdir}/init.d/glustereventsd +%endif +%endif + +##----------------------------------------------------------------------------- +## All %pretrans should be placed here and keep them sorted +## +%if 0%{!?_without_server:1} +%pretrans -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + echo "ERROR: Distribute volumes detected. In-service rolling upgrade requires distribute volume(s) to be stopped." + echo "ERROR: Please stop distribute volume(s) before proceeding... exiting!" + exit 1; + fi + done + + popd > /dev/null 2>&1 + echo "WARNING: Updating glusterfs requires its processes to be killed. This action does NOT incur downtime." + echo "WARNING: Ensure to wait for the upgraded server to finish healing before proceeding." + echo "WARNING: Refer upgrade section of install guide for more details" + echo "Please run # service glusterd stop; pkill glusterfs; pkill glusterfsd; pkill gsyncd.py;" + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + + + +%pretrans api -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + + + +%pretrans api-devel -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + + + +%pretrans cli -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + + +%pretrans client-xlators -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + + +%pretrans fuse -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + + + +%if ( 0%{!?_without_georeplication:1} ) +%pretrans geo-replication -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end +%endif + + + +%pretrans libs -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + + + +%if ( 0%{!?_without_rdma:1} ) +%pretrans rdma -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end +%endif + + + +%if ( 0%{!?_without_ocf:1} ) +%pretrans resource-agents -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end +%endif + + + +%pretrans server -p <lua> +if not posix.access("/bin/bash", "x") then + -- initial installation, no shell, no running glusterfsd + return 0 +end + +-- TODO: move this completely to a lua script +-- For now, we write a temporary bash script and execute that. + +script = [[#!/bin/sh +pidof -c -o %PPID -x glusterfsd &>/dev/null + +if [ $? -eq 0 ]; then + pushd . > /dev/null 2>&1 + for volume in /var/lib/glusterd/vols/*; do cd $volume; + vol_type=`grep '^type=' info | awk -F'=' '{print $2}'` + volume_started=`grep '^status=' info | awk -F'=' '{print $2}'` + if [ $vol_type -eq 0 ] && [ $volume_started -eq 1 ] ; then + exit 1; + fi + done + + popd > /dev/null 2>&1 + exit 1; +fi +]] + +ok, how, val = os.execute(script) +rc = val or ok +if not (rc == 0) then + error("Detected running glusterfs processes", rc) +end + +%posttrans server +pidof -c -o %PPID -x glusterd &> /dev/null +if [ $? -eq 0 ]; then + kill -9 `pgrep -f gsyncd.py` &> /dev/null + + killall --wait -SIGTERM glusterd &> /dev/null + + if [ "$?" != "0" ]; then + echo "killall failed while killing glusterd" + fi + + glusterd --xlator-option *.upgrade=on -N + + #Cleaning leftover glusterd socket file which is created by glusterd in + #rpm_script_t context. + rm -rf /var/run/glusterd.socket + + # glusterd _was_ running, we killed it, it exited after *.upgrade=on, + # so start it again + %service_start glusterd +else + glusterd --xlator-option *.upgrade=on -N + + #Cleaning leftover glusterd socket file which is created by glusterd in + #rpm_script_t context. + rm -rf /var/run/glusterd.socket +fi + +%endif + +%changelog +* Mon Aug 09 2021 Mohan Boddu <mboddu@redhat.com> - 6.0-57.4 +- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags + Related: rhbz#1991688 + +* Sun Aug 1 2021 Tamar Shacked <tshacked@redhat.com> - 6.0-56.4 +- remove unneeded file with ambiguous licence +- fixes bug bz#1939340 + +* Mon Jul 26 2021 Tamar Shacked <tshacked@redhat.com> - 6.0-56.3 +- Rebase with latest RHGS-3.5.4 +- Fix changlog chronological order by removing unneeded changelogs +- fixes bug bz#1939340 + +* Thu May 06 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.2 +- fixes bugs bz#1953901 + +* Thu Apr 22 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56.1 +- fixes bugs bz#1927235 + +* Wed Apr 14 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-56 +- fixes bugs bz#1948547 + +* Fri Mar 19 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-55 +- fixes bugs bz#1939372 + +* Wed Mar 03 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-54 +- fixes bugs bz#1832306 bz#1911292 bz#1924044 + +* Thu Feb 11 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-53 +- fixes bugs bz#1224906 bz#1691320 bz#1719171 bz#1814744 bz#1865796 + +* Thu Jan 28 2021 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-52 +- fixes bugs bz#1600459 bz#1719171 bz#1830713 bz#1856574 + +* Mon Dec 28 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-51 +- fixes bugs bz#1640148 bz#1856574 bz#1910119 + +* Tue Dec 15 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-50 +- fixes bugs bz#1224906 bz#1412494 bz#1612973 bz#1663821 bz#1691320 + bz#1726673 bz#1749304 bz#1752739 bz#1779238 bz#1813866 bz#1814744 bz#1821599 + bz#1832306 bz#1835229 bz#1842449 bz#1865796 bz#1878077 bz#1882923 bz#1885966 + bz#1890506 bz#1896425 bz#1898776 bz#1898777 bz#1898778 bz#1898781 bz#1898784 + bz#1903468 + +* Wed Nov 25 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-49 +- fixes bugs bz#1286171 + +* Tue Nov 10 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-48 +- fixes bugs bz#1895301 + +* Thu Nov 05 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-47 +- fixes bugs bz#1286171 bz#1821743 bz#1837926 + +* Wed Oct 21 2020 Gluster Jenkins <dkhandel+glusterjenkins@redhat.com> - 6.0-46 +- fixes bugs bz#1873469 bz#1881823 + +* Wed Sep 09 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-45 +- fixes bugs bz#1785714 + +* Thu Sep 03 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-44 +- fixes bugs bz#1460657 + +* Thu Sep 03 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-43 +- fixes bugs bz#1460657 + +* Wed Sep 02 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-42 +- fixes bugs bz#1785714 + +* Tue Aug 25 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-41 +- fixes bugs bz#1785714 bz#1851424 bz#1851989 bz#1852736 bz#1853189 bz#1855966 + +* Tue Jul 21 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-40 +- fixes bugs bz#1812789 bz#1844359 bz#1847081 bz#1854165 + +* Wed Jun 17 2020 Deepshikha Khandelwal <dkhandel@redhat.com> - 6.0-39 +- fixes bugs bz#1844359 bz#1845064 + +* Wed Jun 10 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-38 +- fixes bugs bz#1234220 bz#1286171 bz#1487177 bz#1524457 bz#1640573 + bz#1663557 bz#1667954 bz#1683602 bz#1686897 bz#1721355 bz#1748865 bz#1750211 + bz#1754391 bz#1759875 bz#1761531 bz#1761932 bz#1763124 bz#1763129 bz#1764091 + bz#1775637 bz#1776901 bz#1781550 bz#1781649 bz#1781710 bz#1783232 bz#1784211 + bz#1784415 bz#1786516 bz#1786681 bz#1787294 bz#1787310 bz#1787331 bz#1787994 + bz#1790336 bz#1792873 bz#1794663 bz#1796814 bz#1804164 bz#1810924 bz#1815434 + bz#1836099 bz#1837467 bz#1837926 bz#1838479 bz#1839137 bz#1844359 + +* Fri May 29 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-37 +- fixes bugs bz#1840794 + +* Wed May 27 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-36 +- fixes bugs bz#1812789 bz#1823423 + +* Fri May 22 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-35 +- fixes bugs bz#1810516 bz#1830713 bz#1836233 + +* Sun May 17 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-34 +- fixes bugs bz#1802013 bz#1823706 bz#1825177 bz#1830713 bz#1831403 bz#1833017 + +* Wed Apr 29 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-33 +- fixes bugs bz#1812789 bz#1813917 bz#1823703 bz#1823706 bz#1825195 + +* Sat Apr 04 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-32 +- fixes bugs bz#1781543 bz#1812789 bz#1812824 bz#1817369 bz#1819059 + +* Tue Mar 17 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-31 +- fixes bugs bz#1802727 + +* Thu Feb 20 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-30.1 +- fixes bugs bz#1800703 + +* Sat Feb 01 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-30 +- fixes bugs bz#1775564 bz#1794153 + +* Thu Jan 23 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-29 +- fixes bugs bz#1793035 + +* Tue Jan 14 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-28 +- fixes bugs bz#1789447 + +* Mon Jan 13 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-27 +- fixes bugs bz#1789447 + +* Fri Jan 10 2020 Rinku Kothiya <rkothiya@redhat.com> - 6.0-26 +- fixes bugs bz#1763208 bz#1788656 + +* Mon Dec 23 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-25 +- fixes bugs bz#1686800 bz#1763208 bz#1779696 bz#1781444 bz#1782162 + +* Thu Nov 28 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-24 +- fixes bugs bz#1768786 + +* Thu Nov 21 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-23 +- fixes bugs bz#1344758 bz#1599802 bz#1685406 bz#1686800 bz#1724021 + bz#1726058 bz#1727755 bz#1731513 bz#1741193 bz#1758923 bz#1761326 bz#1761486 + bz#1762180 bz#1764095 bz#1766640 + +* Thu Nov 14 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-22 +- fixes bugs bz#1771524 bz#1771614 + +* Fri Oct 25 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-21 +- fixes bugs bz#1765555 + +* Wed Oct 23 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-20 +- fixes bugs bz#1719171 bz#1763412 bz#1764202 + +* Thu Oct 17 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-19 +- fixes bugs bz#1760939 + +* Wed Oct 16 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-18 +- fixes bugs bz#1758432 + +* Fri Oct 11 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-17 +- fixes bugs bz#1704562 bz#1758618 bz#1760261 + +* Wed Oct 09 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-16 +- fixes bugs bz#1752713 bz#1756325 + +* Fri Sep 27 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-15 +- fixes bugs bz#1726000 bz#1731826 bz#1754407 bz#1754790 bz#1755227 + +* Fri Sep 20 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-14 +- fixes bugs bz#1719171 bz#1728673 bz#1731896 bz#1732443 bz#1733970 + bz#1745107 bz#1746027 bz#1748688 bz#1750241 bz#1572163 + +* Fri Aug 23 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-13 +- fixes bugs bz#1729915 bz#1732376 bz#1743611 bz#1743627 bz#1743634 bz#1744518 + +* Fri Aug 09 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-12 +- fixes bugs bz#1730914 bz#1731448 bz#1732770 bz#1732792 bz#1733531 + bz#1734305 bz#1734534 bz#1734734 bz#1735514 bz#1737705 bz#1732774 + bz#1732793 + +* Tue Aug 06 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-11 +- fixes bugs bz#1733520 bz#1734423 + +* Fri Aug 02 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-10 +- fixes bugs bz#1713890 + +* Tue Jul 23 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-9 +- fixes bugs bz#1708064 bz#1708180 bz#1715422 bz#1720992 bz#1722757 + +* Tue Jul 16 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-8 +- fixes bugs bz#1698435 bz#1712591 bz#1715447 bz#1720488 bz#1722209 + bz#1722512 bz#1724089 bz#1726991 bz#1727785 bz#1729108 + +* Fri Jun 28 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-7 +- fixes bugs bz#1573077 bz#1600918 bz#1703423 bz#1704207 bz#1708064 + bz#1709301 bz#1713664 bz#1716760 bz#1717784 bz#1720163 bz#1720192 + bz#1720551 bz#1721351 bz#1721357 bz#1721477 bz#1722131 bz#1722331 + bz#1722509 bz#1722801 bz#1720248 + +* Fri Jun 14 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-6 +- fixes bugs bz#1668001 bz#1708043 bz#1708183 bz#1710701 + bz#1719640 bz#1720079 bz#1720248 bz#1720318 bz#1720461 + +* Tue Jun 11 2019 Sunil Kumar Acharya <sheggodu@redhat.com> - 6.0-5 +- fixes bugs bz#1573077 bz#1694595 bz#1703434 bz#1714536 bz#1714588 + bz#1715407 bz#1715438 bz#1705018 + +* Fri Jun 07 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-4 +- fixes bugs bz#1480907 bz#1702298 bz#1703455 bz#1704181 bz#1707246 + bz#1708067 bz#1708116 bz#1708121 bz#1709087 bz#1711249 bz#1711296 + bz#1714078 bz#1714124 bz#1716385 bz#1716626 bz#1716821 bz#1716865 bz#1717927 + +* Tue May 14 2019 Rinku Kothiya <rkothiya@redhat.com> - 6.0-3 +- fixes bugs bz#1583585 bz#1671862 bz#1702686 bz#1703434 bz#1703753 + bz#1703897 bz#1704562 bz#1704769 bz#1704851 bz#1706683 bz#1706776 bz#1706893 + +* Thu Apr 25 2019 Milind Changire <mchangir@redhat.com> - 6.0-2 +- fixes bugs bz#1471742 bz#1652461 bz#1671862 bz#1676495 bz#1691620 + bz#1696334 bz#1696903 bz#1697820 bz#1698436 bz#1698728 bz#1699709 bz#1699835 + bz#1702240 + +* Mon Apr 08 2019 Milind Changire <mchangir@redhat.com> - 6.0-1 +- rebase to upstream glusterfs at v6.0 +- fixes bugs bz#1493284 bz#1578703 bz#1600918 bz#1670415 bz#1691620 + bz#1693935 bz#1695057 +