From 56092f9ab16a7606e549e0aba84a78ed6de0d397 Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Wed, 24 Jun 2015 08:02:51 +0530 Subject: [PATCH 140/190] cluster/afr: Pick gfid from poststat during fresh lookup for read child calculation Backport of: http://review.gluster.org/11373 Change-Id: I669f7c7d02e1dbd276f3d4c9c7bd8c575517e1b7 BUG: 1223757 Signed-off-by: Krutika Dhananjay Reviewed-on: https://code.engineering.redhat.com/gerrit/51654 Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri --- libglusterfs/src/inode.c | 22 +++++++++ libglusterfs/src/inode.h | 3 + xlators/cluster/afr/src/afr-common.c | 68 +++++++++++++++++----------- xlators/cluster/afr/src/afr-dir-read.c | 7 +++- xlators/cluster/afr/src/afr-dir-write.c | 6 +- xlators/cluster/afr/src/afr-inode-write.c | 5 +- xlators/cluster/afr/src/afr-read-txn.c | 4 +- xlators/cluster/afr/src/afr.h | 19 ++++++--- 8 files changed, 93 insertions(+), 41 deletions(-) diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 7c54865..7d3215e 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -2126,6 +2126,28 @@ inode_ctx_reset0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p) return ret; } +int +inode_is_linked (inode_t *inode) +{ + int ret = 0; + inode_table_t *table = NULL; + + if (!inode) { + gf_log_callingfn (THIS->name, GF_LOG_WARNING, + "inode not found"); + return 0; + } + + table = inode->table; + + pthread_mutex_lock (&table->lock); + { + ret = __is_inode_hashed (inode); + } + pthread_mutex_unlock (&table->lock); + + return ret; +} void inode_dump (inode_t *inode, char *prefix) diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index 5081559..474dc39 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -269,4 +269,7 @@ inode_table_set_lru_limit (inode_table_t *table, uint32_t lru_limit); void inode_ctx_merge (fd_t *fd, inode_t *inode, inode_t *linked_inode); +int +inode_is_linked (inode_t *inode); + #endif /* _INODE_H */ diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 410d31d..7150f0f 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -987,7 +987,7 @@ out: int -afr_hash_child (inode_t *inode, int32_t child_count, int hashmode) +afr_hash_child (afr_read_subvol_args_t *args, int32_t child_count, int hashmode) { uuid_t gfid_copy = {0,}; pid_t pid; @@ -996,11 +996,9 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode) return -1; } - if (inode) { - gf_uuid_copy (gfid_copy, inode->gfid); - } + gf_uuid_copy (gfid_copy, args->gfid); - if (hashmode > 1 && inode->ia_type != IA_IFDIR) { + if ((hashmode > 1) && (args->ia_type != IA_IFDIR)) { /* * Why getpid? Because it's one of the cheapest calls * available - faster than gethostname etc. - and returns a @@ -1021,32 +1019,41 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode) int afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this, - unsigned char *readable) + unsigned char *readable, + afr_read_subvol_args_t *args) { - afr_private_t *priv = NULL; - int read_subvol = -1; - int i = 0; + int i = 0; + int read_subvol = -1; + afr_private_t *priv = NULL; + afr_read_subvol_args_t local_args = {0,}; priv = this->private; /* first preference - explicitly specified or local subvolume */ if (priv->read_child >= 0 && readable[priv->read_child]) - return priv->read_child; + return priv->read_child; + + if (inode_is_linked (inode)) { + gf_uuid_copy (local_args.gfid, inode->gfid); + local_args.ia_type = inode->ia_type; + } else if (args) { + local_args = *args; + } /* second preference - use hashed mode */ - read_subvol = afr_hash_child (inode, priv->child_count, - priv->hash_mode); + read_subvol = afr_hash_child (&local_args, priv->child_count, + priv->hash_mode); if (read_subvol >= 0 && readable[read_subvol]) - return read_subvol; + return read_subvol; for (i = 0; i < priv->child_count; i++) { - if (readable[i]) - return i; + if (readable[i]) + return i; } - /* no readable subvolumes, either split brain or all subvols down */ + /* no readable subvolumes, either split brain or all subvols down */ - return -1; + return -1; } @@ -1069,7 +1076,8 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this, int afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, - int *event_p, afr_transaction_type type) + int *event_p, afr_transaction_type type, + afr_read_subvol_args_t *args) { afr_private_t *priv = NULL; unsigned char *data_readable = NULL; @@ -1096,10 +1104,10 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, if (AFR_COUNT (intersection, priv->child_count) > 0) subvol = afr_read_subvol_select_by_policy (inode, this, - intersection); + intersection, args); else subvol = afr_read_subvol_select_by_policy (inode, this, - readable); + readable, args); if (subvol_p) *subvol_p = subvol; if (event_p) @@ -1413,7 +1421,8 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent, priv = this->private; if (parent) - par_read_subvol = afr_data_subvol_get (parent, this, 0, 0); + par_read_subvol = afr_data_subvol_get (parent, this, 0, 0, + NULL); for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) @@ -1462,6 +1471,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) gf_boolean_t can_interpret = _gf_true; inode_t *parent = NULL; int spb_choice = -1; + ia_type_t ia_type = IA_INVAL; + afr_read_subvol_args_t args = {0,}; priv = this->private; local = frame->local; @@ -1509,6 +1520,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) if (read_subvol == -1 || !readable[read_subvol]) { read_subvol = i; gf_uuid_copy (read_gfid, replies[i].poststat.ia_gfid); + ia_type = replies[i].poststat.ia_type; local->op_ret = 0; } } @@ -1554,14 +1566,16 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) a response from all the UP subvolumes and all of them resolved to the same GFID */ + gf_uuid_copy (args.gfid, read_gfid); + args.ia_type = ia_type; if (afr_replies_interpret (frame, this, local->inode)) { read_subvol = afr_data_subvol_get (local->inode, this, - 0, 0); + 0, 0, &args); afr_inode_read_subvol_reset (local->inode, this); goto cant_interpret; } else { read_subvol = afr_data_subvol_get (local->inode, this, - 0, 0); + 0, 0, &args); } } else { cant_interpret: @@ -1979,7 +1993,7 @@ afr_discover_done (call_frame_t *frame, xlator_t *this) afr_replies_interpret (frame, this, local->inode); - read_subvol = afr_data_subvol_get (local->inode, this, 0, 0); + read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL); if (read_subvol == -1) { gf_log (this->name, GF_LOG_WARNING, "no read subvols for %s", local->loc.path); @@ -2142,7 +2156,7 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req } afr_read_subvol_get (loc->inode, this, NULL, &event, - AFR_DATA_TRANSACTION); + AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) afr_inode_refresh (frame, this, loc->inode, afr_discover_do); @@ -2288,7 +2302,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) } afr_read_subvol_get (loc->parent, this, NULL, &event, - AFR_DATA_TRANSACTION); + AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) afr_inode_refresh (frame, this, loc->parent, afr_lookup_do); @@ -2608,7 +2622,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; - read_subvol = afr_data_subvol_get (local->inode, this, 0, 0); + read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL); LOCK (&frame->lock); { diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 984ed9c..11f583e 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -153,7 +153,12 @@ afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol) if (!priv->consistent_metadata) return 0; - entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0); + /* For an inode fetched through readdirp which is yet to be linked, + * inode ctx would not be initialised (yet). So this function returns + * -1 above due to gen being 0, which is why it is OK to pass NULL for + * read_subvol_args here. + */ + entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0, NULL); if (entry_read_subvol != par_read_subvol) return -1; diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 8a2c0e4..2891f36 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -95,14 +95,14 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) if (local->inode) { afr_replies_interpret (frame, this, local->inode); inode_read_subvol = afr_data_subvol_get (local->inode, this, - NULL, NULL); + NULL, NULL, NULL); } if (local->parent) parent_read_subvol = afr_data_subvol_get (local->parent, this, - NULL, NULL); + NULL, NULL, NULL); if (local->parent2) parent2_read_subvol = afr_data_subvol_get (local->parent2, this, - NULL, NULL); + NULL, NULL, NULL); local->op_ret = -1; local->op_errno = afr_final_errno (local, priv); diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index ecd2b9d..5d32927 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -53,10 +53,11 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this) if (local->inode) { if (local->transaction.type == AFR_METADATA_TRANSACTION) read_subvol = afr_metadata_subvol_get (local->inode, this, - NULL, NULL); + NULL, NULL, + NULL); else read_subvol = afr_data_subvol_get (local->inode, this, - NULL, NULL); + NULL, NULL, NULL); } local->op_ret = -1; diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 0ec1d91..6121108 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -90,7 +90,7 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) memcpy (local->readable, local->child_up, priv->child_count); read_subvol = afr_read_subvol_select_by_policy (inode, this, - local->readable); + local->readable, NULL); if (read_subvol == -1) AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); @@ -232,7 +232,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, goto refresh; read_subvol = afr_read_subvol_select_by_policy (inode, this, - local->readable); + local->readable, NULL); if (read_subvol < 0 || read_subvol > priv->child_count) { gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 855d3a3..e6e7f3e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -755,6 +755,11 @@ typedef struct afr_spbc_timeout { int spb_child_index; } afr_spbc_timeout_t; +typedef struct afr_read_subvol_args { + ia_type_t ia_type; + uuid_t gfid; +} afr_read_subvol_args_t; + /* did a call fail due to a child failing? */ #define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \ ((op_errno == ENOTCONN) || \ @@ -787,7 +792,8 @@ afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this); int afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this, - unsigned char *readable); + unsigned char *readable, + afr_read_subvol_args_t *args); int afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this, @@ -795,13 +801,14 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this, int type); int afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, - int *event_p, afr_transaction_type type); + int *event_p, afr_transaction_type type, + afr_read_subvol_args_t *args); -#define afr_data_subvol_get(i, t, s, e) \ - afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION) +#define afr_data_subvol_get(i, t, s, e, a) \ + afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION, a) -#define afr_metadata_subvol_get(i, t, s, e) \ - afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION) +#define afr_metadata_subvol_get(i, t, s, e, a) \ + afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION, a) int afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, -- 1.7.1