From ea96fcd832de0b49f0e050f535d22a500da1503a Mon Sep 17 00:00:00 2001 From: Xavi Hernandez Date: Thu, 3 Jun 2021 13:14:04 +0200 Subject: [PATCH 562/584] shard.c: Fix formatting A previous downstream change [1] had changed the formatting of the entire xlators/features/shard/src/shard.c. This patch reapplies the correct formatting. No other changes have been made. [1] https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/185716/ BUG: 1925425 Change-Id: Ie655ddaaa26aa884878e66bc0d9ce1f021f6a85f Signed-off-by: Xavi Hernandez Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244956 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- xlators/features/shard/src/shard.c | 11701 ++++++++++++++++++----------------- 1 file changed, 6084 insertions(+), 5617 deletions(-) diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 099b062..c5cc224 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -16,5813 +16,6226 @@ #include #include -static gf_boolean_t __is_shard_dir(uuid_t gfid) { - shard_priv_t *priv = THIS->private; +static gf_boolean_t +__is_shard_dir(uuid_t gfid) +{ + shard_priv_t *priv = THIS->private; - if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) - return _gf_true; + if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) + return _gf_true; - return _gf_false; + return _gf_false; } -static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) { - if (frame->root->pid == GF_CLIENT_PID_GSYNCD && - (__is_shard_dir(loc->pargfid) || - (loc->parent && __is_shard_dir(loc->parent->gfid)))) - return _gf_true; +static gf_boolean_t +__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) +{ + if (frame->root->pid == GF_CLIENT_PID_GSYNCD && + (__is_shard_dir(loc->pargfid) || + (loc->parent && __is_shard_dir(loc->parent->gfid)))) + return _gf_true; - return _gf_false; + return _gf_false; } -void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) { - char gfid_str[GF_UUID_BUF_SIZE] = { - 0, - }; +void +shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) +{ + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; - gf_uuid_unparse(gfid, gfid_str); - snprintf(buf, len, "%s.%d", gfid_str, block_num); + gf_uuid_unparse(gfid, gfid_str); + snprintf(buf, len, "%s.%d", gfid_str, block_num); } -void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, - size_t len) { - char gfid_str[GF_UUID_BUF_SIZE] = { - 0, - }; +void +shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len) +{ + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; - gf_uuid_unparse(gfid, gfid_str); - snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); + gf_uuid_unparse(gfid, gfid_str); + snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); } -int __shard_inode_ctx_get(inode_t *inode, xlator_t *this, - shard_inode_ctx_t **ctx) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx_p = NULL; +int +__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx_p = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret == 0) { - *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - return ret; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret == 0) { + *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + return ret; + } - ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); - if (!ctx_p) - return ret; + ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); + if (!ctx_p) + return ret; - INIT_LIST_HEAD(&ctx_p->ilist); - INIT_LIST_HEAD(&ctx_p->to_fsync_list); + INIT_LIST_HEAD(&ctx_p->ilist); + INIT_LIST_HEAD(&ctx_p->to_fsync_list); - ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); - if (ret < 0) { - GF_FREE(ctx_p); - return ret; - } + ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); + if (ret < 0) { + GF_FREE(ctx_p); + return ret; + } - *ctx = ctx_p; + *ctx = ctx_p; - return ret; + return ret; } -int shard_inode_ctx_get(inode_t *inode, xlator_t *this, - shard_inode_ctx_t **ctx) { - int ret = 0; +int +shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) +{ + int ret = 0; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_get(inode, this, ctx); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get(inode, this, ctx); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, - uint64_t block_size, int32_t valid) { - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) +{ + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - if (valid & SHARD_MASK_BLOCK_SIZE) - ctx->block_size = block_size; + if (valid & SHARD_MASK_BLOCK_SIZE) + ctx->block_size = block_size; - if (valid & SHARD_MASK_PROT) - ctx->stat.ia_prot = stbuf->ia_prot; + if (valid & SHARD_MASK_PROT) + ctx->stat.ia_prot = stbuf->ia_prot; - if (valid & SHARD_MASK_NLINK) - ctx->stat.ia_nlink = stbuf->ia_nlink; + if (valid & SHARD_MASK_NLINK) + ctx->stat.ia_nlink = stbuf->ia_nlink; - if (valid & SHARD_MASK_UID) - ctx->stat.ia_uid = stbuf->ia_uid; + if (valid & SHARD_MASK_UID) + ctx->stat.ia_uid = stbuf->ia_uid; - if (valid & SHARD_MASK_GID) - ctx->stat.ia_gid = stbuf->ia_gid; + if (valid & SHARD_MASK_GID) + ctx->stat.ia_gid = stbuf->ia_gid; - if (valid & SHARD_MASK_SIZE) - ctx->stat.ia_size = stbuf->ia_size; + if (valid & SHARD_MASK_SIZE) + ctx->stat.ia_size = stbuf->ia_size; - if (valid & SHARD_MASK_BLOCKS) - ctx->stat.ia_blocks = stbuf->ia_blocks; + if (valid & SHARD_MASK_BLOCKS) + ctx->stat.ia_blocks = stbuf->ia_blocks; - if (valid & SHARD_MASK_TIMES) { - SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, - stbuf->ia_mtime, stbuf->ia_mtime_nsec); - SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, - stbuf->ia_ctime, stbuf->ia_ctime_nsec); - SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, - stbuf->ia_atime, stbuf->ia_atime_nsec); - } + if (valid & SHARD_MASK_TIMES) { + SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, + stbuf->ia_mtime, stbuf->ia_mtime_nsec); + SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, + stbuf->ia_ctime, stbuf->ia_ctime_nsec); + SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, + stbuf->ia_atime, stbuf->ia_atime_nsec); + } - if (valid & SHARD_MASK_OTHERS) { - ctx->stat.ia_ino = stbuf->ia_ino; - gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); - ctx->stat.ia_dev = stbuf->ia_dev; - ctx->stat.ia_type = stbuf->ia_type; - ctx->stat.ia_rdev = stbuf->ia_rdev; - ctx->stat.ia_blksize = stbuf->ia_blksize; - } + if (valid & SHARD_MASK_OTHERS) { + ctx->stat.ia_ino = stbuf->ia_ino; + gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); + ctx->stat.ia_dev = stbuf->ia_dev; + ctx->stat.ia_type = stbuf->ia_type; + ctx->stat.ia_rdev = stbuf->ia_rdev; + ctx->stat.ia_blksize = stbuf->ia_blksize; + } - if (valid & SHARD_MASK_REFRESH_RESET) - ctx->refresh = _gf_false; + if (valid & SHARD_MASK_REFRESH_RESET) + ctx->refresh = _gf_false; - return 0; + return 0; } -int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, - uint64_t block_size, int32_t valid) { - int ret = -1; +int +shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) +{ + int ret = -1; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +{ + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - ctx->refresh = _gf_true; + ctx->refresh = _gf_true; - return 0; + return 0; } -int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { - int ret = -1; +int +shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) +{ + int ret = -1; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_set_refresh_flag(inode, this); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_set_refresh_flag(inode, this); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +{ + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - ctx->refreshed = _gf_true; - return 0; + ctx->refreshed = _gf_true; + return 0; } -int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { - int ret = -1; +int +shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) +{ + int ret = -1; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, - inode_t *shard_inode) { - int ret = -1; - shard_inode_ctx_t *base_ictx = NULL; - shard_inode_ctx_t *shard_ictx = NULL; +int +__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, + inode_t *shard_inode) +{ + int ret = -1; + shard_inode_ctx_t *base_ictx = NULL; + shard_inode_ctx_t *shard_ictx = NULL; - ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); + if (ret) + return ret; - ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); + if (ret) + return ret; - if (shard_ictx->fsync_needed) { - shard_ictx->fsync_needed++; - return 1; - } + if (shard_ictx->fsync_needed) { + shard_ictx->fsync_needed++; + return 1; + } - list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); - shard_ictx->inode = shard_inode; - shard_ictx->fsync_needed++; - base_ictx->fsync_count++; - shard_ictx->base_inode = base_inode; + list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); + shard_ictx->inode = shard_inode; + shard_ictx->fsync_needed++; + base_ictx->fsync_count++; + shard_ictx->base_inode = base_inode; - return 0; + return 0; } -int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, - inode_t *shard_inode) { - int ret = -1; +int +shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, + inode_t *shard_inode) +{ + int ret = -1; - /* This ref acts as a refkeepr on the base inode. We - * need to keep this inode alive as it holds the head - * of the to_fsync_list. - */ - inode_ref(base_inode); - inode_ref(shard_inode); + /* This ref acts as a refkeepr on the base inode. We + * need to keep this inode alive as it holds the head + * of the to_fsync_list. + */ + inode_ref(base_inode); + inode_ref(shard_inode); - LOCK(&base_inode->lock); - LOCK(&shard_inode->lock); - { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); } - UNLOCK(&shard_inode->lock); - UNLOCK(&base_inode->lock); + LOCK(&base_inode->lock); + LOCK(&shard_inode->lock); + { + ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, + shard_inode); + } + UNLOCK(&shard_inode->lock); + UNLOCK(&base_inode->lock); - /* Unref the base inode corresponding to the ref above, if the shard is - * found to be already part of the fsync list. - */ - if (ret != 0) { - inode_unref(base_inode); - inode_unref(shard_inode); - } - return ret; + /* Unref the base inode corresponding to the ref above, if the shard is + * found to be already part of the fsync list. + */ + if (ret != 0) { + inode_unref(base_inode); + inode_unref(shard_inode); + } + return ret; } -gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +gf_boolean_t +__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +{ + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - /* If inode ctx get fails, better to err on the side of caution and - * try again? Unless the failure is due to mem-allocation. - */ - if (ret) - return _gf_true; + ret = __shard_inode_ctx_get(inode, this, &ctx); + /* If inode ctx get fails, better to err on the side of caution and + * try again? Unless the failure is due to mem-allocation. + */ + if (ret) + return _gf_true; - return !ctx->refreshed; + return !ctx->refreshed; } -gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { - gf_boolean_t flag = _gf_false; +gf_boolean_t +shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) +{ + gf_boolean_t flag = _gf_false; - LOCK(&inode->lock); - { flag = __shard_inode_ctx_needs_lookup(inode, this); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + flag = __shard_inode_ctx_needs_lookup(inode, this); + } + UNLOCK(&inode->lock); - return flag; + return flag; } -int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, - struct iatt *stbuf) { - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +{ + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - if ((stbuf->ia_size != ctx->stat.ia_size) || - (stbuf->ia_blocks != ctx->stat.ia_blocks)) - ctx->refresh = _gf_true; + if ((stbuf->ia_size != ctx->stat.ia_size) || + (stbuf->ia_blocks != ctx->stat.ia_blocks)) + ctx->refresh = _gf_true; - return 0; + return 0; } -int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, - struct iatt *stbuf) { - int ret = -1; +int +shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) +{ + int ret = -1; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_invalidate(inode, this, stbuf); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, - uint64_t *block_size) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, + uint64_t *block_size) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - *block_size = ctx->block_size; + *block_size = ctx->block_size; - return 0; + return 0; } -int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, - uint64_t *block_size) { - int ret = -1; +int +shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, + uint64_t *block_size) +{ + int ret = -1; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get_block_size(inode, this, block_size); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, - int *fsync_count) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, + int *fsync_count) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - *fsync_count = ctx->fsync_needed; + *fsync_count = ctx->fsync_needed; - return 0; + return 0; } -int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, - int *fsync_count) { - int ret = -1; +int +shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, + int *fsync_count) +{ + int ret = -1; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_out) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, + shard_inode_ctx_t *ctx_out) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); - return 0; + memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); + return 0; } -int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_out) { - int ret = -1; +int +shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, + shard_inode_ctx_t *ctx_out) +{ + int ret = -1; - LOCK(&inode->lock); - { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_get_all(inode, this, ctx_out); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, - struct iatt *buf, - gf_boolean_t *need_refresh) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int +__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, + struct iatt *buf, + gf_boolean_t *need_refresh) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - if (ctx->refresh == _gf_false) - *buf = ctx->stat; - else - *need_refresh = _gf_true; + if (ctx->refresh == _gf_false) + *buf = ctx->stat; + else + *need_refresh = _gf_true; - return 0; + return 0; } -int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, - struct iatt *buf, - gf_boolean_t *need_refresh) { - int ret = -1; +int +shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, + struct iatt *buf, + gf_boolean_t *need_refresh) +{ + int ret = -1; - LOCK(&inode->lock); - { - ret = - __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, + need_refresh); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -void shard_local_wipe(shard_local_t *local) { - int i = 0; - int count = 0; +void +shard_local_wipe(shard_local_t *local) +{ + int i = 0; + int count = 0; + + count = local->num_blocks; + + syncbarrier_destroy(&local->barrier); + loc_wipe(&local->loc); + loc_wipe(&local->dot_shard_loc); + loc_wipe(&local->dot_shard_rm_loc); + loc_wipe(&local->loc2); + loc_wipe(&local->tmp_loc); + loc_wipe(&local->int_inodelk.loc); + loc_wipe(&local->int_entrylk.loc); + loc_wipe(&local->newloc); + + if (local->name) + GF_FREE(local->name); + + if (local->int_entrylk.basename) + GF_FREE(local->int_entrylk.basename); + if (local->fd) + fd_unref(local->fd); - count = local->num_blocks; + if (local->xattr_req) + dict_unref(local->xattr_req); + if (local->xattr_rsp) + dict_unref(local->xattr_rsp); - syncbarrier_destroy(&local->barrier); - loc_wipe(&local->loc); - loc_wipe(&local->dot_shard_loc); - loc_wipe(&local->dot_shard_rm_loc); - loc_wipe(&local->loc2); - loc_wipe(&local->tmp_loc); - loc_wipe(&local->int_inodelk.loc); - loc_wipe(&local->int_entrylk.loc); - loc_wipe(&local->newloc); + for (i = 0; i < count; i++) { + if (!local->inode_list) + break; - if (local->name) - GF_FREE(local->name); + if (local->inode_list[i]) + inode_unref(local->inode_list[i]); + } - if (local->int_entrylk.basename) - GF_FREE(local->int_entrylk.basename); - if (local->fd) - fd_unref(local->fd); + GF_FREE(local->inode_list); - if (local->xattr_req) - dict_unref(local->xattr_req); - if (local->xattr_rsp) - dict_unref(local->xattr_rsp); + GF_FREE(local->vector); + if (local->iobref) + iobref_unref(local->iobref); + if (local->list_inited) + gf_dirent_free(&local->entries_head); + if (local->inodelk_frame) + SHARD_STACK_DESTROY(local->inodelk_frame); + if (local->entrylk_frame) + SHARD_STACK_DESTROY(local->entrylk_frame); +} - for (i = 0; i < count; i++) { - if (!local->inode_list) - break; - - if (local->inode_list[i]) - inode_unref(local->inode_list[i]); - } - - GF_FREE(local->inode_list); - - GF_FREE(local->vector); - if (local->iobref) - iobref_unref(local->iobref); - if (local->list_inited) - gf_dirent_free(&local->entries_head); - if (local->inodelk_frame) - SHARD_STACK_DESTROY(local->inodelk_frame); - if (local->entrylk_frame) - SHARD_STACK_DESTROY(local->entrylk_frame); -} - -int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) { - int ret = -1; - void *size_attr = NULL; - uint64_t size_array[4]; - - ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); - if (ret) { - gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, - SHARD_MSG_INTERNAL_XATTR_MISSING, - "Failed to " - "get " GF_XATTR_SHARD_FILE_SIZE " for %s", - uuid_utoa(stbuf->ia_gfid)); - return ret; - } +int +shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) +{ + int ret = -1; + void *size_attr = NULL; + uint64_t size_array[4]; + + ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) { + gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, + SHARD_MSG_INTERNAL_XATTR_MISSING, + "Failed to " + "get " GF_XATTR_SHARD_FILE_SIZE " for %s", + uuid_utoa(stbuf->ia_gfid)); + return ret; + } - memcpy(size_array, size_attr, sizeof(size_array)); + memcpy(size_array, size_attr, sizeof(size_array)); - stbuf->ia_size = ntoh64(size_array[0]); - stbuf->ia_blocks = ntoh64(size_array[2]); + stbuf->ia_size = ntoh64(size_array[0]); + stbuf->ia_blocks = ntoh64(size_array[2]); - return 0; + return 0; } -int shard_call_count_return(call_frame_t *frame) { - int call_count = 0; - shard_local_t *local = NULL; +int +shard_call_count_return(call_frame_t *frame) +{ + int call_count = 0; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - LOCK(&frame->lock); - { call_count = --local->call_count; } - UNLOCK(&frame->lock); + LOCK(&frame->lock); + { + call_count = --local->call_count; + } + UNLOCK(&frame->lock); - return call_count; + return call_count; } -static char *shard_internal_dir_string(shard_internal_dir_type_t type) { - char *str = NULL; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - str = GF_SHARD_DIR; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - str = GF_SHARD_REMOVE_ME_DIR; - break; - default: - break; - } - return str; +static char * +shard_internal_dir_string(shard_internal_dir_type_t type) +{ + char *str = NULL; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + str = GF_SHARD_DIR; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + str = GF_SHARD_REMOVE_ME_DIR; + break; + default: + break; + } + return str; } -static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, - shard_internal_dir_type_t type) { - int ret = -1; - char *bname = NULL; - inode_t *parent = NULL; - loc_t *internal_dir_loc = NULL; - shard_priv_t *priv = NULL; +static int +shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, + shard_internal_dir_type_t type) +{ + int ret = -1; + char *bname = NULL; + inode_t *parent = NULL; + loc_t *internal_dir_loc = NULL; + shard_priv_t *priv = NULL; - priv = this->private; - if (!local) - return -1; + priv = this->private; + if (!local) + return -1; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + internal_dir_loc = &local->dot_shard_loc; + bname = GF_SHARD_DIR; + parent = inode_ref(this->itable->root); + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + internal_dir_loc = &local->dot_shard_rm_loc; + bname = GF_SHARD_REMOVE_ME_DIR; + parent = inode_ref(priv->dot_shard_inode); + break; + default: + break; + } - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - internal_dir_loc = &local->dot_shard_loc; - bname = GF_SHARD_DIR; - parent = inode_ref(this->itable->root); - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - internal_dir_loc = &local->dot_shard_rm_loc; - bname = GF_SHARD_REMOVE_ME_DIR; - parent = inode_ref(priv->dot_shard_inode); - break; - default: - break; - } - - internal_dir_loc->inode = inode_new(this->itable); - internal_dir_loc->parent = parent; - ret = inode_path(internal_dir_loc->parent, bname, - (char **)&internal_dir_loc->path); - if (ret < 0 || !(internal_dir_loc->inode)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", bname); - goto out; - } - - internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); - if (internal_dir_loc->name) - internal_dir_loc->name++; - - ret = 0; -out: - return ret; -} - -inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, - inode_t *base_inode, int block_num, - uuid_t gfid) { - char block_bname[256] = { - 0, - }; - inode_t *lru_inode = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *lru_inode_ctx = NULL; - shard_inode_ctx_t *lru_base_inode_ctx = NULL; - inode_t *fsync_inode = NULL; - inode_t *lru_base_inode = NULL; - gf_boolean_t do_fsync = _gf_false; - - priv = this->private; - - shard_inode_ctx_get(linked_inode, this, &ctx); - - if (list_empty(&ctx->ilist)) { - if (priv->inode_count + 1 <= priv->lru_limit) { - /* If this inode was linked here for the first time (indicated - * by empty list), and if there is still space in the priv list, - * add this ctx to the tail of the list. - */ - /* For as long as an inode is in lru list, we try to - * keep it alive by holding a ref on it. - */ - inode_ref(linked_inode); - if (base_inode) - gf_uuid_copy(ctx->base_gfid, base_inode->gfid); - else - gf_uuid_copy(ctx->base_gfid, gfid); - ctx->block_num = block_num; - list_add_tail(&ctx->ilist, &priv->ilist_head); - priv->inode_count++; - ctx->base_inode = inode_ref(base_inode); - } else { - /*If on the other hand there is no available slot for this inode - * in the list, delete the lru inode from the head of the list, - * unlink it. And in its place add this new inode into the list. - */ - lru_inode_ctx = - list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist); - GF_ASSERT(lru_inode_ctx->block_num > 0); - lru_base_inode = lru_inode_ctx->base_inode; - list_del_init(&lru_inode_ctx->ilist); - lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid); - /* If the lru inode was part of the pending-fsync list, - * the base inode needs to be unref'd, the lru inode - * deleted from fsync list and fsync'd in a new frame, - * and then unlinked in memory and forgotten. - */ - if (!lru_base_inode) - goto after_fsync_check; - LOCK(&lru_base_inode->lock); - LOCK(&lru_inode->lock); - { - if (!list_empty(&lru_inode_ctx->to_fsync_list)) { - list_del_init(&lru_inode_ctx->to_fsync_list); - lru_inode_ctx->fsync_needed = 0; - do_fsync = _gf_true; - __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx); - lru_base_inode_ctx->fsync_count--; - } - } - UNLOCK(&lru_inode->lock); - UNLOCK(&lru_base_inode->lock); - - after_fsync_check: - if (!do_fsync) { - shard_make_block_bname(lru_inode_ctx->block_num, - lru_inode_ctx->base_gfid, block_bname, - sizeof(block_bname)); - /* The following unref corresponds to the ref held at - * the time the shard was added to the lru list. - */ - inode_unref(lru_inode); - inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); - inode_forget(lru_inode, 0); - } else { - /* The following unref corresponds to the ref - * held when the shard was added to fsync list. - */ - inode_unref(lru_inode); - fsync_inode = lru_inode; - if (lru_base_inode) - inode_unref(lru_base_inode); - } - /* The following unref corresponds to the ref - * held by inode_find() above. - */ - inode_unref(lru_inode); - - /* The following unref corresponds to the ref held on the base shard - * at the time of adding shard inode to lru list - */ - if (lru_base_inode) - inode_unref(lru_base_inode); - - /* For as long as an inode is in lru list, we try to - * keep it alive by holding a ref on it. - */ - inode_ref(linked_inode); - if (base_inode) - gf_uuid_copy(ctx->base_gfid, base_inode->gfid); - else - gf_uuid_copy(ctx->base_gfid, gfid); - ctx->block_num = block_num; - ctx->base_inode = inode_ref(base_inode); - list_add_tail(&ctx->ilist, &priv->ilist_head); - } - } else { - /* If this is not the first time this inode is being operated on, move - * it to the most recently used end of the list. - */ - list_move_tail(&ctx->ilist, &priv->ilist_head); - } - return fsync_inode; -} - -int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, - int32_t op_ret, int32_t op_errno) { - switch (fop) { - case GF_FOP_LOOKUP: - SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL); - break; - case GF_FOP_STAT: - SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_FSTAT: - SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_TRUNCATE: - SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_FTRUNCATE: - SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_MKNOD: - SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, - NULL); - break; - case GF_FOP_LINK: - SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, - NULL); - break; - case GF_FOP_CREATE: - SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - break; - case GF_FOP_UNLINK: - SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_RENAME: - SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, - NULL, NULL); - break; - case GF_FOP_WRITE: - SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_FALLOCATE: - SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_ZEROFILL: - SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_DISCARD: - SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_READ: - SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL, - NULL); - break; - case GF_FOP_FSYNC: - SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_REMOVEXATTR: - SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_FREMOVEXATTR: - SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_FGETXATTR: - SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_GETXATTR: - SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_FSETXATTR: - SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_SETXATTR: - SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_SETATTR: - SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_FSETATTR: - SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL); - break; - case GF_FOP_SEEK: - SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); - break; - default: - gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", fop); - break; - } - return 0; -} - -int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, - call_frame_t *frame, - int32_t op_ret) { - shard_local_t *local = NULL; - - local = frame->local; - - switch (fop) { - case GF_FOP_WRITE: - SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - case GF_FOP_FALLOCATE: - SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - case GF_FOP_ZEROFILL: - SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - case GF_FOP_DISCARD: - SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - default: - gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", fop); - break; - } - return 0; -} - -int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) { - char block_bname[256] = { - 0, - }; - fd_t *anon_fd = cookie; - inode_t *shard_inode = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - - if (anon_fd == NULL || op_ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, - "fsync failed on shard"); - goto out; - } - shard_inode = anon_fd->inode; - - LOCK(&priv->lock); - LOCK(&shard_inode->lock); - { - __shard_inode_ctx_get(shard_inode, this, &ctx); - if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { - shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname, - sizeof(block_bname)); - inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); - /* The following unref corresponds to the ref held by - * inode_link() at the time the shard was created or - * looked up - */ - inode_unref(shard_inode); - inode_forget(shard_inode, 0); - } - } - UNLOCK(&shard_inode->lock); - UNLOCK(&priv->lock); + internal_dir_loc->inode = inode_new(this->itable); + internal_dir_loc->parent = parent; + ret = inode_path(internal_dir_loc->parent, bname, + (char **)&internal_dir_loc->path); + if (ret < 0 || !(internal_dir_loc->inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", bname); + goto out; + } + + internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); + if (internal_dir_loc->name) + internal_dir_loc->name++; + ret = 0; out: - if (anon_fd) - fd_unref(anon_fd); - STACK_DESTROY(frame->root); - return 0; + return ret; } -int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) { - fd_t *anon_fd = NULL; - call_frame_t *fsync_frame = NULL; - - fsync_frame = create_frame(this, this->ctx->pool); - if (!fsync_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create new frame " - "to fsync shard"); - return -1; - } - - anon_fd = fd_anonymous(inode); - if (!anon_fd) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create anon fd to" - " fsync shard"); - STACK_DESTROY(fsync_frame->root); - return -1; - } - - STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, - 1, NULL); - return 0; -} - -int shard_common_resolve_shards( - call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t post_res_handler) { - int i = -1; - uint32_t shard_idx_iter = 0; - char path[PATH_MAX] = { - 0, - }; - uuid_t gfid = { - 0, - }; - inode_t *inode = NULL; - inode_t *res_inode = NULL; - inode_t *fsync_inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - local->call_count = 0; - shard_idx_iter = local->first_block; - res_inode = local->resolver_base_inode; - if (res_inode) - gf_uuid_copy(gfid, res_inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); - - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; - - while (shard_idx_iter <= local->last_block) { - i++; - if (shard_idx_iter == 0) { - local->inode_list[i] = inode_ref(res_inode); - shard_idx_iter++; - continue; - } - - shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); - - inode = NULL; - inode = inode_resolve(this->itable, path); - if (inode) { - gf_msg_debug(this->name, 0, "Shard %d already " - "present. gfid=%s. Saving inode for future.", - shard_idx_iter, uuid_utoa(inode->gfid)); - local->inode_list[i] = inode; - /* Let the ref on the inodes that are already present - * in inode table still be held so that they don't get - * forgotten by the time the fop reaches the actual - * write stage. - */ - LOCK(&priv->lock); - { - fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode, - shard_idx_iter, gfid); - } - UNLOCK(&priv->lock); - shard_idx_iter++; - if (fsync_inode) - shard_initiate_evicted_inode_fsync(this, fsync_inode); - continue; +inode_t * +__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, + inode_t *base_inode, int block_num, + uuid_t gfid) +{ + char block_bname[256] = { + 0, + }; + inode_t *lru_inode = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *lru_inode_ctx = NULL; + shard_inode_ctx_t *lru_base_inode_ctx = NULL; + inode_t *fsync_inode = NULL; + inode_t *lru_base_inode = NULL; + gf_boolean_t do_fsync = _gf_false; + + priv = this->private; + + shard_inode_ctx_get(linked_inode, this, &ctx); + + if (list_empty(&ctx->ilist)) { + if (priv->inode_count + 1 <= priv->lru_limit) { + /* If this inode was linked here for the first time (indicated + * by empty list), and if there is still space in the priv list, + * add this ctx to the tail of the list. + */ + /* For as long as an inode is in lru list, we try to + * keep it alive by holding a ref on it. + */ + inode_ref(linked_inode); + if (base_inode) + gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; + list_add_tail(&ctx->ilist, &priv->ilist_head); + priv->inode_count++; + ctx->base_inode = inode_ref(base_inode); + } else { + /*If on the other hand there is no available slot for this inode + * in the list, delete the lru inode from the head of the list, + * unlink it. And in its place add this new inode into the list. + */ + lru_inode_ctx = list_first_entry(&priv->ilist_head, + shard_inode_ctx_t, ilist); + GF_ASSERT(lru_inode_ctx->block_num > 0); + lru_base_inode = lru_inode_ctx->base_inode; + list_del_init(&lru_inode_ctx->ilist); + lru_inode = inode_find(linked_inode->table, + lru_inode_ctx->stat.ia_gfid); + /* If the lru inode was part of the pending-fsync list, + * the base inode needs to be unref'd, the lru inode + * deleted from fsync list and fsync'd in a new frame, + * and then unlinked in memory and forgotten. + */ + if (!lru_base_inode) + goto after_fsync_check; + LOCK(&lru_base_inode->lock); + LOCK(&lru_inode->lock); + { + if (!list_empty(&lru_inode_ctx->to_fsync_list)) { + list_del_init(&lru_inode_ctx->to_fsync_list); + lru_inode_ctx->fsync_needed = 0; + do_fsync = _gf_true; + __shard_inode_ctx_get(lru_base_inode, this, + &lru_base_inode_ctx); + lru_base_inode_ctx->fsync_count--; + } + } + UNLOCK(&lru_inode->lock); + UNLOCK(&lru_base_inode->lock); + + after_fsync_check: + if (!do_fsync) { + shard_make_block_bname(lru_inode_ctx->block_num, + lru_inode_ctx->base_gfid, block_bname, + sizeof(block_bname)); + /* The following unref corresponds to the ref held at + * the time the shard was added to the lru list. + */ + inode_unref(lru_inode); + inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); + inode_forget(lru_inode, 0); + } else { + /* The following unref corresponds to the ref + * held when the shard was added to fsync list. + */ + inode_unref(lru_inode); + fsync_inode = lru_inode; + if (lru_base_inode) + inode_unref(lru_base_inode); + } + /* The following unref corresponds to the ref + * held by inode_find() above. + */ + inode_unref(lru_inode); + + /* The following unref corresponds to the ref held on the base shard + * at the time of adding shard inode to lru list + */ + if (lru_base_inode) + inode_unref(lru_base_inode); + + /* For as long as an inode is in lru list, we try to + * keep it alive by holding a ref on it. + */ + inode_ref(linked_inode); + if (base_inode) + gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; + ctx->base_inode = inode_ref(base_inode); + list_add_tail(&ctx->ilist, &priv->ilist_head); + } } else { - local->call_count++; - shard_idx_iter++; + /* If this is not the first time this inode is being operated on, move + * it to the most recently used end of the list. + */ + list_move_tail(&ctx->ilist, &priv->ilist_head); } - } -out: - post_res_handler(frame, this); - return 0; + return fsync_inode; } -int shard_update_file_size_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) { - inode_t *inode = NULL; - shard_local_t *local = NULL; - - local = frame->local; - - if ((local->fd) && (local->fd->inode)) - inode = local->fd->inode; - else if (local->loc.inode) - inode = local->loc.inode; - - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size" - " xattr failed on %s", - uuid_utoa(inode->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } - - if (shard_modify_size_and_block_count(&local->postbuf, dict)) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } -err: - local->post_update_size_handler(frame, this); - return 0; +int +shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, + int32_t op_ret, int32_t op_errno) +{ + switch (fop) { + case GF_FOP_LOOKUP: + SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_STAT: + SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_FSTAT: + SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_TRUNCATE: + SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_FTRUNCATE: + SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_MKNOD: + SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_LINK: + SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_CREATE: + SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + break; + case GF_FOP_UNLINK: + SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_RENAME: + SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, + NULL, NULL, NULL, NULL); + break; + case GF_FOP_WRITE: + SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_FALLOCATE: + SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_ZEROFILL: + SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_DISCARD: + SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_READ: + SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, + NULL, NULL); + break; + case GF_FOP_FSYNC: + SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_REMOVEXATTR: + SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FREMOVEXATTR: + SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FGETXATTR: + SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_GETXATTR: + SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_FSETXATTR: + SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETXATTR: + SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETATTR: + SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_FSETATTR: + SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, + NULL); + break; + case GF_FOP_SEEK: + SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); + break; + default: + gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; } -int shard_set_size_attrs(int64_t size, int64_t block_count, - int64_t **size_attr_p) { - int ret = -1; - int64_t *size_attr = NULL; +int +shard_common_inode_write_success_unwind(glusterfs_fop_t fop, + call_frame_t *frame, int32_t op_ret) +{ + shard_local_t *local = NULL; - if (!size_attr_p) - goto out; + local = frame->local; - size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); - if (!size_attr) - goto out; + switch (fop) { + case GF_FOP_WRITE: + SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + case GF_FOP_FALLOCATE: + SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + case GF_FOP_ZEROFILL: + SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + case GF_FOP_DISCARD: + SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + default: + gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; +} - size_attr[0] = hton64(size); - /* As sharding evolves, it _may_ be necessary to embed more pieces of - * information within the same xattr. So allocating slots for them in - * advance. For now, only bytes 0-63 and 128-191 which would make up the - * current size and block count respectively of the file are valid. - */ - size_attr[2] = hton64(block_count); +int +shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + char block_bname[256] = { + 0, + }; + fd_t *anon_fd = cookie; + inode_t *shard_inode = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_priv_t *priv = NULL; - *size_attr_p = size_attr; + priv = this->private; - ret = 0; -out: - return ret; -} + if (anon_fd == NULL || op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, + "fsync failed on shard"); + goto out; + } + shard_inode = anon_fd->inode; -int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, - loc_t *loc, - shard_post_update_size_fop_handler_t handler) { - int ret = -1; - int64_t *size_attr = NULL; - int64_t delta_blocks = 0; - inode_t *inode = NULL; - shard_local_t *local = NULL; - dict_t *xattr_req = NULL; + LOCK(&priv->lock); + LOCK(&shard_inode->lock); + { + __shard_inode_ctx_get(shard_inode, this, &ctx); + if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { + shard_make_block_bname(ctx->block_num, shard_inode->gfid, + block_bname, sizeof(block_bname)); + inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); + /* The following unref corresponds to the ref held by + * inode_link() at the time the shard was created or + * looked up + */ + inode_unref(shard_inode); + inode_forget(shard_inode, 0); + } + } + UNLOCK(&shard_inode->lock); + UNLOCK(&priv->lock); - local = frame->local; - local->post_update_size_handler = handler; +out: + if (anon_fd) + fd_unref(anon_fd); + STACK_DESTROY(frame->root); + return 0; +} - xattr_req = dict_new(); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } - - if (fd) - inode = fd->inode; - else - inode = loc->inode; - - /* If both size and block count have not changed, then skip the xattrop. - */ - delta_blocks = GF_ATOMIC_GET(local->delta_blocks); - if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { - goto out; - } - - ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks, - &size_attr); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, - "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } - - ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE, - uuid_utoa(inode->gfid)); - GF_FREE(size_attr); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } +int +shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) +{ + fd_t *anon_fd = NULL; + call_frame_t *fsync_frame = NULL; + + fsync_frame = create_frame(this, this->ctx->pool); + if (!fsync_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to fsync shard"); + return -1; + } - if (fd) - STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64, - xattr_req, NULL); - else - STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64, - xattr_req, NULL); + anon_fd = fd_anonymous(inode); + if (!anon_fd) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create anon fd to" + " fsync shard"); + STACK_DESTROY(fsync_frame->root); + return -1; + } - dict_unref(xattr_req); - return 0; + STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, + anon_fd, 1, NULL); + return 0; +} -out: - if (xattr_req) - dict_unref(xattr_req); - handler(frame, this); - return 0; -} - -static inode_t *shard_link_internal_dir_inode(shard_local_t *local, - inode_t *inode, struct iatt *buf, - shard_internal_dir_type_t type) { - inode_t *linked_inode = NULL; - shard_priv_t *priv = NULL; - char *bname = NULL; - inode_t **priv_inode = NULL; - inode_t *parent = NULL; - - priv = THIS->private; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - bname = GF_SHARD_DIR; - priv_inode = &priv->dot_shard_inode; - parent = inode->table->root; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - bname = GF_SHARD_REMOVE_ME_DIR; - priv_inode = &priv->dot_shard_rm_inode; - parent = priv->dot_shard_inode; - break; - default: - break; - } - - linked_inode = inode_link(inode, parent, bname, buf); - inode_lookup(linked_inode); - *priv_inode = linked_inode; - return linked_inode; -} - -int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) { - shard_local_t *local = NULL; - inode_t *linked_inode = NULL; - shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - - local = frame->local; - - if (op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto out; - } - - /* To-Do: Fix refcount increment per call to - * shard_link_internal_dir_inode(). - */ - linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); - shard_inode_ctx_mark_dir_refreshed(linked_inode, this); -out: - shard_common_resolve_shards(frame, this, local->post_res_handler); - return 0; -} - -int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, - shard_internal_dir_type_t type) { - loc_t loc = { - 0, - }; - inode_t *inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - uuid_t gfid = { - 0, - }; - - local = frame->local; - priv = this->private; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - gf_uuid_copy(gfid, priv->dot_shard_gfid); - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); - break; - default: - break; - } - - inode = inode_find(this->itable, gfid); - - if (!shard_inode_ctx_needs_lookup(inode, this)) { - local->op_ret = 0; - goto out; - } +int +shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t post_res_handler) +{ + int i = -1; + uint32_t shard_idx_iter = 0; + char path[PATH_MAX] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *inode = NULL; + inode_t *res_inode = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; - /* Plain assignment because the ref is already taken above through - * call to inode_find() - */ - loc.inode = inode; - gf_uuid_copy(loc.gfid, gfid); + priv = this->private; + local = frame->local; + local->call_count = 0; + shard_idx_iter = local->first_block; + res_inode = local->resolver_base_inode; + if (res_inode) + gf_uuid_copy(gfid, res_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); - STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, - NULL); - loc_wipe(&loc); + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; - return 0; + while (shard_idx_iter <= local->last_block) { + i++; + if (shard_idx_iter == 0) { + local->inode_list[i] = inode_ref(res_inode); + shard_idx_iter++; + continue; + } + shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); + + inode = NULL; + inode = inode_resolve(this->itable, path); + if (inode) { + gf_msg_debug(this->name, 0, + "Shard %d already " + "present. gfid=%s. Saving inode for future.", + shard_idx_iter, uuid_utoa(inode->gfid)); + local->inode_list[i] = inode; + /* Let the ref on the inodes that are already present + * in inode table still be held so that they don't get + * forgotten by the time the fop reaches the actual + * write stage. + */ + LOCK(&priv->lock); + { + fsync_inode = __shard_update_shards_inode_list( + inode, this, res_inode, shard_idx_iter, gfid); + } + UNLOCK(&priv->lock); + shard_idx_iter++; + if (fsync_inode) + shard_initiate_evicted_inode_fsync(this, fsync_inode); + continue; + } else { + local->call_count++; + shard_idx_iter++; + } + } out: - shard_common_resolve_shards(frame, this, local->post_res_handler); - return 0; + post_res_handler(frame, this); + return 0; } -int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) { - inode_t *link_inode = NULL; - shard_local_t *local = NULL; - shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; +int +shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + inode_t *inode = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } - - if (!IA_ISDIR(buf->ia_type)) { - gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, - "%s already exists and " - "is not a directory. Please remove it from all bricks " - "and try again", - shard_internal_dir_string(type)); - local->op_ret = -1; - local->op_errno = EIO; - goto unwind; - } - - link_inode = shard_link_internal_dir_inode(local, inode, buf, type); - if (link_inode != inode) { - shard_refresh_internal_dir(frame, this, type); - } else { - shard_inode_ctx_mark_dir_refreshed(link_inode, this); - shard_common_resolve_shards(frame, this, local->post_res_handler); - } - return 0; + if ((local->fd) && (local->fd->inode)) + inode = local->fd->inode; + else if (local->loc.inode) + inode = local->loc.inode; -unwind: - local->post_res_handler(frame, this); - return 0; -} - -int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t post_res_handler, - shard_internal_dir_type_t type) { - int ret = -1; - dict_t *xattr_req = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - uuid_t *gfid = NULL; - loc_t *loc = NULL; - gf_boolean_t free_gfid = _gf_true; - - local = frame->local; - priv = this->private; - local->post_res_handler = post_res_handler; - - gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); - if (!gfid) - goto err; - - xattr_req = dict_new(); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - gf_uuid_copy(*gfid, priv->dot_shard_gfid); - loc = &local->dot_shard_loc; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); - loc = &local->dot_shard_rm_loc; - break; - default: - bzero(*gfid, sizeof(uuid_t)); - break; - } - - ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set gfid of %s into dict", - shard_internal_dir_string(type)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } else { - free_gfid = _gf_false; - } - - STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, - xattr_req); - - dict_unref(xattr_req); - return 0; - -err: - if (xattr_req) - dict_unref(xattr_req); - if (free_gfid) - GF_FREE(gfid); - post_res_handler(frame, this); - return 0; -} - -static void shard_inode_ctx_update(inode_t *inode, xlator_t *this, - dict_t *xdata, struct iatt *buf) { - int ret = 0; - uint64_t size = 0; - void *bsize = NULL; - - if (shard_inode_ctx_get_block_size(inode, this, &size)) { - /* Fresh lookup */ - ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); - if (!ret) - size = ntoh64(*((uint64_t *)bsize)); - /* If the file is sharded, set its block size, otherwise just - * set 0. - */ - - shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); - } - /* If the file is sharded, also set the remaining attributes, - * except for ia_size and ia_blocks. - */ - if (size) { - shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); - (void)shard_inode_ctx_invalidate(inode, this, buf); - } -} - -int shard_delete_shards(void *opaque); - -int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); - -int shard_start_background_deletion(xlator_t *this) { - int ret = 0; - gf_boolean_t i_cleanup = _gf_true; - shard_priv_t *priv = NULL; - call_frame_t *cleanup_frame = NULL; - - priv = this->private; - - LOCK(&priv->lock); - { - switch (priv->bg_del_state) { - case SHARD_BG_DELETION_NONE: - i_cleanup = _gf_true; - priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; - break; - case SHARD_BG_DELETION_LAUNCHING: - i_cleanup = _gf_false; - break; - case SHARD_BG_DELETION_IN_PROGRESS: - priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; - i_cleanup = _gf_false; - break; - default: - break; - } - } - UNLOCK(&priv->lock); - if (!i_cleanup) - return 0; - - cleanup_frame = create_frame(this, this->ctx->pool); - if (!cleanup_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create " - "new frame to delete shards"); - ret = -ENOMEM; - goto err; - } - - set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); - - ret = synctask_new(this->ctx->env, shard_delete_shards, - shard_delete_shards_cbk, cleanup_frame, cleanup_frame); - if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED, - "failed to create task to do background " - "cleanup of shards"); - STACK_DESTROY(cleanup_frame->root); - goto err; - } - return 0; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_UPDATE_FILE_SIZE_FAILED, + "Update to file size" + " xattr failed on %s", + uuid_utoa(inode->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + if (shard_modify_size_and_block_count(&local->postbuf, dict)) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } err: - LOCK(&priv->lock); - { priv->bg_del_state = SHARD_BG_DELETION_NONE; } - UNLOCK(&priv->lock); - return ret; + local->post_update_size_handler(frame, this); + return 0; } -int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int ret = -1; - shard_priv_t *priv = NULL; - gf_boolean_t i_start_cleanup = _gf_false; - - priv = this->private; - - if (op_ret < 0) - goto unwind; - - if (IA_ISDIR(buf->ia_type)) - goto unwind; - - /* Also, if the file is sharded, get the file size and block cnt xattr, - * and store them in the stbuf appropriately. - */ - - if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && - frame->root->pid != GF_CLIENT_PID_GSYNCD) - shard_modify_size_and_block_count(buf, xdata); - - /* If this was a fresh lookup, there are two possibilities: - * 1) If the file is sharded (indicated by the presence of block size - * xattr), store this block size, along with rdev and mode in its - * inode ctx. - * 2) If the file is not sharded, store size along with rdev and mode - * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is - * already initialised to all zeroes, nothing more needs to be done. - */ +int +shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p) +{ + int ret = -1; + int64_t *size_attr = NULL; - (void)shard_inode_ctx_update(inode, this, xdata, buf); + if (!size_attr_p) + goto out; - LOCK(&priv->lock); - { - if (priv->first_lookup_done == _gf_false) { - priv->first_lookup_done = _gf_true; - i_start_cleanup = _gf_true; - } - } - UNLOCK(&priv->lock); + size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); + if (!size_attr) + goto out; - if (!i_start_cleanup) - goto unwind; + size_attr[0] = hton64(size); + /* As sharding evolves, it _may_ be necessary to embed more pieces of + * information within the same xattr. So allocating slots for them in + * advance. For now, only bytes 0-63 and 128-191 which would make up the + * current size and block count respectively of the file are valid. + */ + size_attr[2] = hton64(block_count); - ret = shard_start_background_deletion(this); - if (ret < 0) { - LOCK(&priv->lock); - { priv->first_lookup_done = _gf_false; } - UNLOCK(&priv->lock); - } + *size_attr_p = size_attr; -unwind: - SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, - postparent); - return 0; + ret = 0; +out: + return ret; } -int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) { - int ret = -1; - int32_t op_errno = ENOMEM; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - this->itable = loc->inode->table; - if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && - (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) { - SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); - } +int +shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, + loc_t *loc, shard_post_update_size_fop_handler_t handler) +{ + int ret = -1; + int64_t *size_attr = NULL; + int64_t delta_blocks = 0; + inode_t *inode = NULL; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; - local = mem_get0(this->local_pool); - if (!local) - goto err; + local = frame->local; + local->post_update_size_handler = handler; - frame->local = local; + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } - loc_copy(&local->loc, loc); + if (fd) + inode = fd->inode; + else + inode = loc->inode; - local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); - if (!local->xattr_req) - goto err; + /* If both size and block count have not changed, then skip the xattrop. + */ + delta_blocks = GF_ATOMIC_GET(local->delta_blocks); + if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { + goto out; + } - if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + ret = shard_set_size_attrs(local->delta_size + local->hole_size, + delta_blocks, &size_attr); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict" - " value: key:%s for path %s", - GF_XATTR_SHARD_BLOCK_SIZE, loc->path); - goto err; + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, + "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; } - } - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict value: key:%s for path %s.", - GF_XATTR_SHARD_FILE_SIZE, loc->path); - goto err; + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict. gfid=%s", + GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid)); + GF_FREE(size_attr); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; } - } - if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) - dict_del(xattr_req, GF_CONTENT_KEY); + if (fd) + STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, + GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); + else + STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, + GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); - STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); - return 0; -err: - shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); - return 0; -} + dict_unref(xattr_req); + return 0; -int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xdata, - struct iatt *postparent) { - int ret = -1; - int32_t mask = SHARD_INODE_WRITE_MASK; - shard_local_t *local = NULL; - shard_inode_ctx_t ctx = { - 0, - }; - - local = frame->local; - - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" - " failed : %s", - loc_gfid_utoa(&(local->loc))); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } +out: + if (xattr_req) + dict_unref(xattr_req); + handler(frame, this); + return 0; +} - local->prebuf = *buf; - if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } - - if (shard_inode_ctx_get_all(inode, this, &ctx)) - mask = SHARD_ALL_MASK; - - ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, - (mask | SHARD_MASK_REFRESH_RESET)); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, - "Failed to set inode" - " write params into inode ctx for %s", - uuid_utoa(buf->ia_gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } +static inode_t * +shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode, + struct iatt *buf, shard_internal_dir_type_t type) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + char *bname = NULL; + inode_t **priv_inode = NULL; + inode_t *parent = NULL; + + priv = THIS->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + bname = GF_SHARD_DIR; + priv_inode = &priv->dot_shard_inode; + parent = inode->table->root; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + bname = GF_SHARD_REMOVE_ME_DIR; + priv_inode = &priv->dot_shard_rm_inode; + parent = priv->dot_shard_inode; + break; + default: + break; + } -unwind: - local->handler(frame, this); - return 0; -} - -int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, - shard_post_fop_handler_t handler) { - int ret = -1; - shard_local_t *local = NULL; - dict_t *xattr_req = NULL; - gf_boolean_t need_refresh = _gf_false; - - local = frame->local; - local->handler = handler; - - ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, - &need_refresh); - /* By this time, inode ctx should have been created either in create, - * mknod, readdirp or lookup. If not it is a bug! - */ - if ((ret == 0) && (need_refresh == _gf_false)) { - gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" - "Serving prebuf off the inode ctx cache", - uuid_utoa(loc->gfid)); - goto out; - } - - xattr_req = dict_new(); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } + linked_inode = inode_link(inode, parent, bname, buf); + inode_lookup(linked_inode); + *priv_inode = linked_inode; + return linked_inode; +} - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); +int +shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + shard_local_t *local = NULL; + inode_t *linked_inode = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + local = frame->local; - dict_unref(xattr_req); - return 0; + if (op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto out; + } + /* To-Do: Fix refcount increment per call to + * shard_link_internal_dir_inode(). + */ + linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); + shard_inode_ctx_mark_dir_refreshed(linked_inode, this); out: - if (xattr_req) - dict_unref(xattr_req); - handler(frame, this); - return 0; + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; } -int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; +int +shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, + shard_internal_dir_type_t type) +{ + loc_t loc = { + 0, + }; + inode_t *inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; - local = frame->local; + local = frame->local; + priv = this->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(gfid, priv->dot_shard_gfid); + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); + break; + default: + break; + } - if (local->op_ret >= 0) - shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, - SHARD_LOOKUP_MASK); + inode = inode_find(this->itable, gfid); - SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, - &local->prebuf, local->xattr_rsp); - return 0; -} + if (!shard_inode_ctx_needs_lookup(inode, this)) { + local->op_ret = 0; + goto out; + } -int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + /* Plain assignment because the ref is already taken above through + * call to inode_find() + */ + loc.inode = inode; + gf_uuid_copy(loc.gfid, gfid); - local = frame->local; + STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, + NULL); + loc_wipe(&loc); - if (local->op_ret >= 0) - shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, - SHARD_LOOKUP_MASK); + return 0; - SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, - &local->prebuf, local->xattr_rsp); - return 0; +out: + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; } -int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) { - inode_t *inode = NULL; - shard_local_t *local = NULL; - - local = frame->local; +int +shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + inode_t *link_inode = NULL; + shard_local_t *local = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, - "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid) - : uuid_utoa((local->loc.inode)->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } + local = frame->local; - local->prebuf = *buf; - if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } - local->xattr_rsp = dict_ref(xdata); + if (op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } - if (local->loc.inode) - inode = local->loc.inode; - else - inode = local->fd->inode; + if (!IA_ISDIR(buf->ia_type)) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, + "%s already exists and " + "is not a directory. Please remove it from all bricks " + "and try again", + shard_internal_dir_string(type)); + local->op_ret = -1; + local->op_errno = EIO; + goto unwind; + } - shard_inode_ctx_invalidate(inode, this, &local->prebuf); + link_inode = shard_link_internal_dir_inode(local, inode, buf, type); + if (link_inode != inode) { + shard_refresh_internal_dir(frame, this, type); + } else { + shard_inode_ctx_mark_dir_refreshed(link_inode, this); + shard_common_resolve_shards(frame, this, local->post_res_handler); + } + return 0; unwind: - local->handler(frame, this); - return 0; + local->post_res_handler(frame, this); + return 0; } -int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int +shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t post_res_handler, + shard_internal_dir_type_t type) +{ + int ret = -1; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + uuid_t *gfid = NULL; + loc_t *loc = NULL; + gf_boolean_t free_gfid = _gf_true; - if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { - STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; - } + local = frame->local; + priv = this->private; + local->post_res_handler = post_res_handler; - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; - } + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid) + goto err; - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; - } + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } - local = mem_get0(this->local_pool); - if (!local) - goto err; + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; + default: + bzero(*gfid, sizeof(uuid_t)); + break; + } - frame->local = local; + ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid of %s into dict", + shard_internal_dir_string(type)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } else { + free_gfid = _gf_false; + } - local->handler = shard_post_stat_handler; - loc_copy(&local->loc, loc); - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; + STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, + xattr_req); - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, - local, err); + dict_unref(xattr_req); + return 0; - STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); - return 0; err: - shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); - return 0; + if (xattr_req) + dict_unref(xattr_req); + if (free_gfid) + GF_FREE(gfid); + post_res_handler(frame, this); + return 0; } -int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; +static void +shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata, + struct iatt *buf) +{ + int ret = 0; + uint64_t size = 0; + void *bsize = NULL; + + if (shard_inode_ctx_get_block_size(inode, this, &size)) { + /* Fresh lookup */ + ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); + if (!ret) + size = ntoh64(*((uint64_t *)bsize)); + /* If the file is sharded, set its block size, otherwise just + * set 0. + */ + + shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); + } + /* If the file is sharded, also set the remaining attributes, + * except for ia_size and ia_blocks. + */ + if (size) { + shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); + (void)shard_inode_ctx_invalidate(inode, this, buf); + } +} - if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { - STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; - } +int +shard_delete_shards(void *opaque); - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(fd->inode->gfid)); - goto err; - } +int +shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; - } +int +shard_start_background_deletion(xlator_t *this) +{ + int ret = 0; + gf_boolean_t i_cleanup = _gf_true; + shard_priv_t *priv = NULL; + call_frame_t *cleanup_frame = NULL; - if (!this->itable) - this->itable = fd->inode->table; + priv = this->private; - local = mem_get0(this->local_pool); - if (!local) - goto err; + LOCK(&priv->lock); + { + switch (priv->bg_del_state) { + case SHARD_BG_DELETION_NONE: + i_cleanup = _gf_true; + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + break; + case SHARD_BG_DELETION_LAUNCHING: + i_cleanup = _gf_false; + break; + case SHARD_BG_DELETION_IN_PROGRESS: + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + i_cleanup = _gf_false; + break; + default: + break; + } + } + UNLOCK(&priv->lock); + if (!i_cleanup) + return 0; - frame->local = local; + cleanup_frame = create_frame(this, this->ctx->pool); + if (!cleanup_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create " + "new frame to delete shards"); + ret = -ENOMEM; + goto err; + } - local->handler = shard_post_fstat_handler; - local->fd = fd_ref(fd); - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; + set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, - local, err); + ret = synctask_new(this->ctx->env, shard_delete_shards, + shard_delete_shards_cbk, cleanup_frame, cleanup_frame); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, + SHARD_MSG_SHARDS_DELETION_FAILED, + "failed to create task to do background " + "cleanup of shards"); + STACK_DESTROY(cleanup_frame->root); + goto err; + } + return 0; - STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); - return 0; err: - shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); - return 0; + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); + return ret; } -int shard_post_update_size_truncate_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; +int +shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) +{ + int ret = -1; + shard_priv_t *priv = NULL; + gf_boolean_t i_start_cleanup = _gf_false; - local = frame->local; + priv = this->private; - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, NULL); - else - SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, NULL); - return 0; -} + if (op_ret < 0) + goto unwind; -int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) { - inode_t *inode = NULL; - int64_t delta_blocks = 0; - shard_local_t *local = NULL; + if (IA_ISDIR(buf->ia_type)) + goto unwind; - local = frame->local; + /* Also, if the file is sharded, get the file size and block cnt xattr, + * and store them in the stbuf appropriately. + */ - SHARD_UNSET_ROOT_FS_ID(frame, local); + if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && + frame->root->pid != GF_CLIENT_PID_GSYNCD) + shard_modify_size_and_block_count(buf, xdata); + + /* If this was a fresh lookup, there are two possibilities: + * 1) If the file is sharded (indicated by the presence of block size + * xattr), store this block size, along with rdev and mode in its + * inode ctx. + * 2) If the file is not sharded, store size along with rdev and mode + * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is + * already initialised to all zeroes, nothing more needs to be done. + */ - inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last" - " shard failed : %s", - uuid_utoa(inode->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } - - local->postbuf.ia_size = local->offset; - /* Let the delta be negative. We want xattrop to do subtraction */ - local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, - postbuf->ia_blocks - prebuf->ia_blocks); - GF_ASSERT(delta_blocks <= 0); - local->postbuf.ia_blocks += delta_blocks; - local->hole_size = 0; - - shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); - shard_update_file_size(frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; -} - -int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, - inode_t *inode) { - size_t last_shard_size_after = 0; - loc_t loc = { - 0, - }; - shard_local_t *local = NULL; - - local = frame->local; - - /* A NULL inode could be due to the fact that the last shard which - * needs to be truncated does not exist due to it lying in a hole - * region. So the only thing left to do in that case would be an - * update to file size xattr. - */ - if (!inode) { - gf_msg_debug(this->name, 0, - "Last shard to be truncated absent in backend:%" PRIu64 - " of gfid: %s. Directly proceeding to update file size", - local->first_block, uuid_utoa(local->loc.inode->gfid)); - shard_update_file_size(frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } + (void)shard_inode_ctx_update(inode, this, xdata, buf); - SHARD_SET_ROOT_FS_ID(frame, local); + LOCK(&priv->lock); + { + if (priv->first_lookup_done == _gf_false) { + priv->first_lookup_done = _gf_true; + i_start_cleanup = _gf_true; + } + } + UNLOCK(&priv->lock); - loc.inode = inode_ref(inode); - gf_uuid_copy(loc.gfid, inode->gfid); + if (!i_start_cleanup) + goto unwind; - last_shard_size_after = (local->offset % local->block_size); + ret = shard_start_background_deletion(this); + if (ret < 0) { + LOCK(&priv->lock); + { + priv->first_lookup_done = _gf_false; + } + UNLOCK(&priv->lock); + } - STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, - NULL); - loc_wipe(&loc); - return 0; +unwind: + SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; } -void shard_unlink_block_inode(shard_local_t *local, int shard_block_num); +int +shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +{ + int ret = -1; + int32_t op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; -int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) { - int ret = 0; - int call_count = 0; - int shard_block_num = (long)cookie; - uint64_t block_count = 0; - shard_local_t *local = NULL; + this->itable = loc->inode->table; + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && + (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) { + SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); + } - local = frame->local; + local = mem_get0(this->local_pool); + if (!local) + goto err; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; - } - ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); - if (!ret) { - GF_ATOMIC_SUB(local->delta_blocks, block_count); - } else { - /* dict_get failed possibly due to a heterogeneous cluster? */ - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to get key %s from dict during truncate of gfid %s", - GF_GET_FILE_BLOCK_COUNT, - uuid_utoa(local->resolver_base_inode->gfid)); - } - - shard_unlink_block_inode(local, shard_block_num); -done: - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); - shard_truncate_last_shard(frame, this, local->inode_list[0]); - } - return 0; -} - -int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) { - int i = 1; - int ret = -1; - int call_count = 0; - uint32_t cur_block = 0; - uint32_t last_block = 0; - char path[PATH_MAX] = { - 0, - }; - char *bname = NULL; - loc_t loc = { - 0, - }; - gf_boolean_t wind_failed = _gf_false; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - dict_t *xdata_req = NULL; - - local = frame->local; - priv = this->private; - - cur_block = local->first_block + 1; - last_block = local->last_block; - - /* Determine call count */ - for (i = 1; i < local->num_blocks; i++) { - if (!local->inode_list[i]) - continue; - call_count++; - } - - if (!call_count) { - /* Call count = 0 implies that all of the shards that need to be - * unlinked do not exist. So shard xlator would now proceed to - * do the final truncate + size updates. - */ - gf_msg_debug(this->name, 0, "Shards to be unlinked as part of " - "truncate absent in backend: %s. Directly " - "proceeding to update file size", - uuid_utoa(inode->gfid)); - local->postbuf.ia_size = local->offset; - local->postbuf.ia_blocks = local->prebuf.ia_blocks; - local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - GF_ATOMIC_INIT(local->delta_blocks, 0); - local->hole_size = 0; - shard_update_file_size(frame, this, local->fd, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } + frame->local = local; - local->call_count = call_count; - i = 1; - xdata_req = dict_new(); - if (!xdata_req) { - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; - } - ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set key %s into dict during truncate of %s", - GF_GET_FILE_BLOCK_COUNT, - uuid_utoa(local->resolver_base_inode->gfid)); - dict_unref(xdata_req); - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; - } + loc_copy(&local->loc, loc); - SHARD_SET_ROOT_FS_ID(frame, local); - while (cur_block <= last_block) { - if (!local->inode_list[i]) { - cur_block++; - i++; - continue; - } - if (wind_failed) { - shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, - NULL, NULL, NULL); - goto next; - } + local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + if (!local->xattr_req) + goto err; - shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); - bname = strrchr(path, '/') + 1; - loc.parent = inode_ref(priv->dot_shard_inode); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on %s. Base file gfid = %s", - bname, uuid_utoa(inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe(&loc); - wind_failed = _gf_true; - shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, - NULL, NULL, NULL); - goto next; + if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict" + " value: key:%s for path %s", + GF_XATTR_SHARD_BLOCK_SIZE, loc->path); + goto err; + } } - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - loc.inode = inode_ref(local->inode_list[i]); - STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc, - 0, xdata_req); - loc_wipe(&loc); - next: - i++; - cur_block++; - if (!--call_count) - break; - } - dict_unref(xdata_req); - return 0; -} - -int shard_truncate_do(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, + 8 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s for path %s.", + GF_XATTR_SHARD_FILE_SIZE, loc->path); + goto err; + } + } - local = frame->local; + if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) + dict_del(xattr_req, GF_CONTENT_KEY); - if (local->num_blocks == 1) { - /* This means that there are no shards to be unlinked. - * The fop boils down to truncating the last shard, updating - * the size and unwinding. - */ - shard_truncate_last_shard(frame, this, local->inode_list[0]); + STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); + return 0; +err: + shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); return 0; - } else { - shard_truncate_htol(frame, this, local->loc.inode); - } - return 0; } -int shard_post_lookup_shards_truncate_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; +int +shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + int ret = -1; + int32_t mask = SHARD_INODE_WRITE_MASK; + shard_local_t *local = NULL; + shard_inode_ctx_t ctx = { + 0, + }; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } - - shard_truncate_do(frame, this); - return 0; -} - -void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, - struct iatt *buf) { - int list_index = 0; - char block_bname[256] = { - 0, - }; - uuid_t gfid = { - 0, - }; - inode_t *linked_inode = NULL; - xlator_t *this = NULL; - inode_t *fsync_inode = NULL; - shard_priv_t *priv = NULL; - inode_t *base_inode = NULL; - - this = THIS; - priv = this->private; - if (local->loc.inode) { - gf_uuid_copy(gfid, local->loc.inode->gfid); - base_inode = local->loc.inode; - } else if (local->resolver_base_inode) { - gf_uuid_copy(gfid, local->resolver_base_inode->gfid); - base_inode = local->resolver_base_inode; - } else { - gf_uuid_copy(gfid, local->base_gfid); - } - - shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); - - shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); - linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); - inode_lookup(linked_inode); - list_index = block_num - local->first_block; - local->inode_list[list_index] = linked_inode; - - LOCK(&priv->lock); - { - fsync_inode = __shard_update_shards_inode_list(linked_inode, this, - base_inode, block_num, gfid); - } - UNLOCK(&priv->lock); - if (fsync_inode) - shard_initiate_evicted_inode_fsync(this, fsync_inode); -} - -int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) { - int call_count = 0; - int shard_block_num = (long)cookie; - uuid_t gfid = { - 0, - }; - shard_local_t *local = NULL; - - local = frame->local; - if (local->resolver_base_inode) - gf_uuid_copy(gfid, local->resolver_base_inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); - - if (op_ret < 0) { - /* Ignore absence of shards in the backend in truncate fop. */ - switch (local->fop) { - case GF_FOP_TRUNCATE: - case GF_FOP_FTRUNCATE: - case GF_FOP_RENAME: - case GF_FOP_UNLINK: - if (op_errno == ENOENT) - goto done; - break; - case GF_FOP_WRITE: - case GF_FOP_READ: - case GF_FOP_ZEROFILL: - case GF_FOP_DISCARD: - case GF_FOP_FALLOCATE: - if ((!local->first_lookup_done) && (op_errno == ENOENT)) { - LOCK(&frame->lock); - { local->create_count++; } - UNLOCK(&frame->lock); - goto done; - } - break; - default: - break; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_BASE_FILE_LOOKUP_FAILED, + "Lookup on base file" + " failed : %s", + loc_gfid_utoa(&(local->loc))); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; } - /* else */ - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED, - "Lookup on shard %d " - "failed. Base file gfid = %s", - shard_block_num, uuid_utoa(gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; - } - - shard_link_block_inode(local, shard_block_num, inode, buf); - -done: - if (local->lookup_shards_barriered) { - syncbarrier_wake(&local->barrier); - return 0; - } else { - call_count = shard_call_count_return(frame); - if (call_count == 0) { - if (!local->first_lookup_done) - local->first_lookup_done = _gf_true; - local->pls_fop_handler(frame, this); + local->prebuf = *buf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; } - } - return 0; -} -dict_t *shard_create_gfid_dict(dict_t *dict) { - int ret = 0; - dict_t *new = NULL; - unsigned char *gfid = NULL; + if (shard_inode_ctx_get_all(inode, this, &ctx)) + mask = SHARD_ALL_MASK; - new = dict_copy_with_ref(dict, NULL); - if (!new) - return NULL; + ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, + (mask | SHARD_MASK_REFRESH_RESET)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, + "Failed to set inode" + " write params into inode ctx for %s", + uuid_utoa(buf->ia_gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } - gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); - if (!gfid) { - ret = -1; - goto out; - } +unwind: + local->handler(frame, this); + return 0; +} - gf_uuid_generate(gfid); +int +shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, + shard_post_fop_handler_t handler) +{ + int ret = -1; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; + gf_boolean_t need_refresh = _gf_false; - ret = dict_set_gfuuid(new, "gfid-req", gfid, false); + local = frame->local; + local->handler = handler; -out: - if (ret) { - dict_unref(new); - new = NULL; - GF_FREE(gfid); - } - - return new; -} - -int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, - inode_t *inode, - shard_post_lookup_shards_fop_handler_t handler) { - int i = 0; - int ret = 0; - int count = 0; - int call_count = 0; - int32_t shard_idx_iter = 0; - int last_block = 0; - char path[PATH_MAX] = { - 0, - }; - char *bname = NULL; - uuid_t gfid = { - 0, - }; - loc_t loc = { - 0, - }; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - gf_boolean_t wind_failed = _gf_false; - dict_t *xattr_req = NULL; - - priv = this->private; - local = frame->local; - count = call_count = local->call_count; - shard_idx_iter = local->first_block; - last_block = local->last_block; - local->pls_fop_handler = handler; - if (local->lookup_shards_barriered) - local->barrier.waitfor = local->call_count; - - if (inode) - gf_uuid_copy(gfid, inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); - - while (shard_idx_iter <= last_block) { - if (local->inode_list[i]) { - i++; - shard_idx_iter++; - continue; - } - - if (wind_failed) { - shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, - -1, ENOMEM, NULL, NULL, NULL, NULL); - goto next; - } - - shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); - - bname = strrchr(path, '/') + 1; - loc.inode = inode_new(this->itable); - loc.parent = inode_ref(priv->dot_shard_inode); - gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0 || !(loc.inode)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on %s, base file gfid = %s", - bname, uuid_utoa(gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe(&loc); - wind_failed = _gf_true; - shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, - -1, ENOMEM, NULL, NULL, NULL, NULL); - goto next; + ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, + &need_refresh); + /* By this time, inode ctx should have been created either in create, + * mknod, readdirp or lookup. If not it is a bug! + */ + if ((ret == 0) && (need_refresh == _gf_false)) { + gf_msg_debug(this->name, 0, + "Skipping lookup on base file: %s" + "Serving prebuf off the inode ctx cache", + uuid_utoa(loc->gfid)); + goto out; } - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - - xattr_req = shard_create_gfid_dict(local->xattr_req); + xattr_req = dict_new(); if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - loc_wipe(&loc); - shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, - -1, ENOMEM, NULL, NULL, NULL, NULL); - goto next; - } - - STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, - (void *)(long)shard_idx_iter, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); - loc_wipe(&loc); - dict_unref(xattr_req); - next: - shard_idx_iter++; - i++; - - if (!--call_count) - break; - } - if (local->lookup_shards_barriered) { - syncbarrier_wait(&local->barrier, count); - local->pls_fop_handler(frame, this); - } - return 0; -} - -int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - if (local->op_errno == ENOENT) { - /* If lookup on /.shard fails with ENOENT, it means that - * the file was 0-byte in size but truncated sometime in - * the past to a higher size which is reflected in the - * size xattr, and now being truncated to a lower size. - * In this case, the only thing that needs to be done is - * to update the size xattr of the file and unwind. - */ - local->first_block = local->last_block = 0; - local->num_blocks = 1; - local->call_count = 0; - local->op_ret = 0; - local->postbuf.ia_size = local->offset; - shard_update_file_size(frame, this, local->fd, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } else { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; } - } - if (!local->call_count) - shard_truncate_do(frame, this); - else - shard_common_lookup_shards(frame, this, local->loc.inode, - shard_post_lookup_shards_truncate_handler); - - return 0; -} - -int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { - int ret = 0; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; - - /* First participant block here is the lowest numbered block that would - * hold the last byte of the file post successful truncation. - * Last participant block is the block that contains the last byte in - * the current state of the file. - * If (first block == last_block): - * then that means that the file only needs truncation of the - * first (or last since both are same) block. - * Else - * if (new_size % block_size == 0) - * then that means there is no truncate to be done with - * only shards from first_block + 1 through the last - * block needing to be unlinked. - * else - * both truncate of the first block and unlink of the - * remaining shards until end of file is required. - */ - local->first_block = - (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1, - local->block_size); - local->last_block = - get_highest_block(0, local->prebuf.ia_size, local->block_size); - - local->num_blocks = local->last_block - local->first_block + 1; - GF_ASSERT(local->num_blocks > 0); - local->resolver_base_inode = - (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; - - if ((local->first_block == 0) && (local->num_blocks == 1)) { - if (local->fop == GF_FOP_TRUNCATE) - STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset, - local->xattr_req); - else - STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset, - local->xattr_req); - return 0; - } + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); - local->inode_list = - GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); - if (!local->inode_list) - goto err; + STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); - local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = - shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret) - goto err; - shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - local->post_res_handler = shard_post_resolve_truncate_handler; - shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); - } - return 0; + dict_unref(xattr_req); + return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; +out: + if (xattr_req) + dict_unref(xattr_req); + handler(frame, this); + return 0; } -int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - struct iatt tmp_stbuf = { - 0, - }; - - local = frame->local; +int +shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } + local = frame->local; - local->postbuf = tmp_stbuf = local->prebuf; + if (local->op_ret >= 0) + shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, + SHARD_LOOKUP_MASK); - if (local->prebuf.ia_size == local->offset) { - /* If the file size is same as requested size, unwind the call - * immediately. - */ - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf, - NULL); - else - SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, - &local->postbuf, NULL); - } else if (local->offset > local->prebuf.ia_size) { - /* If the truncate is from a lower to a higher size, set the - * new size xattr and unwind. - */ - local->hole_size = local->offset - local->prebuf.ia_size; - local->delta_size = 0; - GF_ATOMIC_INIT(local->delta_blocks, 0); - local->postbuf.ia_size = local->offset; - tmp_stbuf.ia_size = local->offset; - shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, - SHARD_INODE_WRITE_MASK); - shard_update_file_size(frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - } else { - /* ... else - * i. unlink all shards that need to be unlinked. - * ii. truncate the last of the shards. - * iii. update the new size using setxattr. - * and unwind the fop. - */ - local->hole_size = 0; - local->delta_size = (local->offset - local->prebuf.ia_size); - GF_ATOMIC_INIT(local->delta_blocks, 0); - tmp_stbuf.ia_size = local->offset; - shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, - SHARD_INODE_WRITE_MASK); - shard_truncate_begin(frame, this); - } - return 0; + SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, + &local->prebuf, local->xattr_rsp); + return 0; } -/* TO-DO: - * Fix updates to size and block count with racing write(s) and truncate(s). - */ +int +shard_post_stat_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; -int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; + local = frame->local; - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; - } + if (local->op_ret >= 0) + shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, + SHARD_LOOKUP_MASK); - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, + &local->prebuf, local->xattr_rsp); return 0; - } - - if (!this->itable) - this->itable = loc->inode->table; +} - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - - ret = syncbarrier_init(&local->barrier); - if (ret) - goto err; - loc_copy(&local->loc, loc); - local->offset = offset; - local->block_size = block_size; - local->fop = GF_FOP_TRUNCATE; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->resolver_base_inode = loc->inode; - GF_ATOMIC_INIT(local->delta_blocks, 0); - - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); - return 0; +int +shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + inode_t *inode = NULL; + shard_local_t *local = NULL; -err: - shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); - return 0; -} - -int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(fd->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; - } - - if (!this->itable) - this->itable = fd->inode->table; - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - ret = syncbarrier_init(&local->barrier); - if (ret) - goto err; - local->fd = fd_ref(fd); - local->offset = offset; - local->block_size = block_size; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_FTRUNCATE; - - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - local->resolver_base_inode = fd->inode; - GF_ATOMIC_INIT(local->delta_blocks, 0); - - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); - return 0; -err: - shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); - return 0; -} + local = frame->local; -int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) { - int ret = -1; - shard_local_t *local = NULL; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, + "stat failed: %s", + local->fd ? uuid_utoa(local->fd->inode->gfid) + : uuid_utoa((local->loc.inode)->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } - local = frame->local; + local->prebuf = *buf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + local->xattr_rsp = dict_ref(xdata); - if (op_ret == -1) - goto unwind; + if (local->loc.inode) + inode = local->loc.inode; + else + inode = local->fd->inode; - ret = - shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, - "Failed to set inode " - "ctx for %s", - uuid_utoa(inode->gfid)); + shard_inode_ctx_invalidate(inode, this, &local->prebuf); unwind: - SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, - postparent, xdata); - - return 0; + local->handler(frame, this); + return 0; } -int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) { - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; +int +shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + } - priv = this->private; - local = mem_get0(this->local_pool); - if (!local) - goto err; + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } - frame->local = local; - local->block_size = priv->block_size; - if (!__is_gsyncd_on_shard_dir(frame, loc)) { - SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); - } + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + } - STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); - return 0; -err: - shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); - return 0; -} + local = mem_get0(this->local_pool); + if (!local) + goto err; -int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) { - shard_local_t *local = NULL; + frame->local = local; - local = frame->local; - if (op_ret < 0) - goto err; + local->handler = shard_post_stat_handler; + loc_copy(&local->loc, loc); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES); - buf->ia_size = local->prebuf.ia_size; - buf->ia_blocks = local->prebuf.ia_blocks; + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, + local, err); - SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, - postparent, xdata); - return 0; + STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); + return 0; err: - shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); + return 0; } -int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL, - NULL, NULL, NULL); - return 0; - } +int +shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, - local->xattr_req); - return 0; -} + if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + } -int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } - ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(oldloc->inode->gfid)); - goto err; - } + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + } - if (!block_size) { - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, - oldloc, newloc, xdata); - return 0; - } + if (!this->itable) + this->itable = fd->inode->table; - if (!this->itable) - this->itable = oldloc->inode->table; + local = mem_get0(this->local_pool); + if (!local) + goto err; - local = mem_get0(this->local_pool); - if (!local) - goto err; + frame->local = local; - frame->local = local; + local->handler = shard_post_fstat_handler; + local->fd = fd_ref(fd); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_link_handler); - return 0; + STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); + return 0; err: - shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); + return 0; } -int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); - -int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; - uuid_t gfid = { - 0, - }; +int +shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->resolver_base_inode) - gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + if (local->fop == GF_FOP_TRUNCATE) + SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, NULL); else - gf_uuid_copy(gfid, local->base_gfid); - - if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { - gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, - "failed to delete shards of %s", uuid_utoa(gfid)); + SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, NULL); return 0; - } - local->op_ret = 0; - local->op_errno = 0; - - shard_unlink_shards_do(frame, this, local->resolver_base_inode); - return 0; } -int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; - local->lookup_shards_barriered = _gf_true; - - if (!local->call_count) - shard_unlink_shards_do(frame, this, local->resolver_base_inode); - else - shard_common_lookup_shards(frame, this, local->resolver_base_inode, - shard_post_lookup_shards_unlink_handler); - return 0; -} - -void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) { - char block_bname[256] = { - 0, - }; - uuid_t gfid = { - 0, - }; - inode_t *inode = NULL; - inode_t *base_inode = NULL; - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *base_ictx = NULL; - int unref_base_inode = 0; - int unref_shard_inode = 0; - - this = THIS; - priv = this->private; - - inode = local->inode_list[shard_block_num - local->first_block]; - shard_inode_ctx_get(inode, this, &ctx); - base_inode = ctx->base_inode; - if (base_inode) - gf_uuid_copy(gfid, base_inode->gfid); - else - gf_uuid_copy(gfid, ctx->base_gfid); - shard_make_block_bname(shard_block_num, gfid, block_bname, - sizeof(block_bname)); - - LOCK(&priv->lock); - if (base_inode) - LOCK(&base_inode->lock); - LOCK(&inode->lock); - { - __shard_inode_ctx_get(inode, this, &ctx); - if (!list_empty(&ctx->ilist)) { - list_del_init(&ctx->ilist); - priv->inode_count--; - unref_base_inode++; - unref_shard_inode++; - GF_ASSERT(priv->inode_count >= 0); - } - if (ctx->fsync_needed) { - unref_base_inode++; - unref_shard_inode++; - list_del_init(&ctx->to_fsync_list); - if (base_inode) { - __shard_inode_ctx_get(base_inode, this, &base_ictx); - base_ictx->fsync_count--; - } - } - } - UNLOCK(&inode->lock); - if (base_inode) - UNLOCK(&base_inode->lock); +int +shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + inode_t *inode = NULL; + int64_t delta_blocks = 0; + shard_local_t *local = NULL; - inode_unlink(inode, priv->dot_shard_inode, block_bname); - inode_ref_reduce_by_n(inode, unref_shard_inode); - inode_forget(inode, 0); + local = frame->local; - if (base_inode && unref_base_inode) - inode_ref_reduce_by_n(base_inode, unref_base_inode); - UNLOCK(&priv->lock); -} + SHARD_UNSET_ROOT_FS_ID(frame, local); -int shard_rename_cbk(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode + : local->fd->inode; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, + "truncate on last" + " shard failed : %s", + uuid_utoa(inode->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } - local = frame->local; + local->postbuf.ia_size = local->offset; + /* Let the delta be negative. We want xattrop to do subtraction */ + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; + delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, + postbuf->ia_blocks - prebuf->ia_blocks); + GF_ASSERT(delta_blocks <= 0); + local->postbuf.ia_blocks += delta_blocks; + local->hole_size = 0; - SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->preoldparent, - &local->postoldparent, &local->prenewparent, - &local->postnewparent, local->xattr_rsp); - return 0; + shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; } -int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = frame->local; +int +shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) +{ + size_t last_shard_size_after = 0; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; - SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preoldparent, &local->postoldparent, - local->xattr_rsp); - return 0; -} + local = frame->local; -int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) { - int shard_block_num = (long)cookie; - shard_local_t *local = NULL; + /* A NULL inode could be due to the fact that the last shard which + * needs to be truncated does not exist due to it lying in a hole + * region. So the only thing left to do in that case would be an + * update to file size xattr. + */ + if (!inode) { + gf_msg_debug(this->name, 0, + "Last shard to be truncated absent in backend:%" PRIu64 + " of gfid: %s. Directly proceeding to update file size", + local->first_block, uuid_utoa(local->loc.inode->gfid)); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } - local = frame->local; + SHARD_SET_ROOT_FS_ID(frame, local); - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; - } + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - shard_unlink_block_inode(local, shard_block_num); -done: - syncbarrier_wake(&local->barrier); - return 0; -} - -int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, - inode_t *inode) { - int i = 0; - int ret = -1; - int count = 0; - uint32_t cur_block = 0; - uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ - char *bname = NULL; - char path[PATH_MAX] = { - 0, - }; - uuid_t gfid = { - 0, - }; - loc_t loc = { - 0, - }; - gf_boolean_t wind_failed = _gf_false; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; - - if (inode) - gf_uuid_copy(gfid, inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); - - for (i = 0; i < local->num_blocks; i++) { - if (!local->inode_list[i]) - continue; - count++; - } - - if (!count) { - /* callcount = 0 implies that all of the shards that need to be - * unlinked are non-existent (in other words the file is full of - * holes). - */ - gf_msg_debug(this->name, 0, "All shards that need to be " - "unlinked are non-existent: %s", - uuid_utoa(gfid)); + last_shard_size_after = (local->offset % local->block_size); + + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, + NULL); + loc_wipe(&loc); return 0; - } +} - SHARD_SET_ROOT_FS_ID(frame, local); - local->barrier.waitfor = count; - cur_block = cur_block_idx + local->first_block; +void +shard_unlink_block_inode(shard_local_t *local, int shard_block_num); - while (cur_block_idx < local->num_blocks) { - if (!local->inode_list[cur_block_idx]) - goto next; +int +shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int ret = 0; + int call_count = 0; + int shard_block_num = (long)cookie; + uint64_t block_count = 0; + shard_local_t *local = NULL; + + local = frame->local; - if (wind_failed) { - shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } + ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); + if (!ret) { + GF_ATOMIC_SUB(local->delta_blocks, block_count); + } else { + /* dict_get failed possibly due to a heterogeneous cluster? */ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get key %s from dict during truncate of gfid %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); } - shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); - bname = strrchr(path, '/') + 1; - loc.parent = inode_ref(priv->dot_shard_inode); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on %s, base file gfid = %s", - bname, uuid_utoa(gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe(&loc); - wind_failed = _gf_true; - shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; + shard_unlink_block_inode(local, shard_block_num); +done: + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + shard_truncate_last_shard(frame, this, local->inode_list[0]); } + return 0; +} - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - loc.inode = inode_ref(local->inode_list[cur_block_idx]); +int +shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) +{ + int i = 1; + int ret = -1; + int call_count = 0; + uint32_t cur_block = 0; + uint32_t last_block = 0; + char path[PATH_MAX] = { + 0, + }; + char *bname = NULL; + loc_t loc = { + 0, + }; + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + dict_t *xdata_req = NULL; - STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, - (void *)(long)cur_block, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, - local->xattr_req); - loc_wipe(&loc); - next: - cur_block++; - cur_block_idx++; - } - syncbarrier_wait(&local->barrier, count); - SHARD_UNSET_ROOT_FS_ID(frame, local); - return 0; -} - -int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, - int now, int first_block, - gf_dirent_t *entry) { - int i = 0; - int ret = 0; - shard_local_t *local = NULL; - uuid_t gfid = { - 0, - }; - - local = cleanup_frame->local; - - local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list); - if (!local->inode_list) - return -ENOMEM; - - local->first_block = first_block; - local->last_block = first_block + now - 1; - local->num_blocks = now; - gf_uuid_parse(entry->d_name, gfid); - gf_uuid_copy(local->base_gfid, gfid); - local->resolver_base_inode = inode_find(this->itable, gfid); - local->call_count = 0; - ret = syncbarrier_init(&local->barrier); - if (ret) { - GF_FREE(local->inode_list); - local->inode_list = NULL; - inode_unref(local->resolver_base_inode); - local->resolver_base_inode = NULL; - return -errno; - } - shard_common_resolve_shards(cleanup_frame, this, - shard_post_resolve_unlink_handler); - - for (i = 0; i < local->num_blocks; i++) { - if (local->inode_list[i]) - inode_unref(local->inode_list[i]); - } - GF_FREE(local->inode_list); - local->inode_list = NULL; - if (local->op_ret) - ret = -local->op_errno; - syncbarrier_destroy(&local->barrier); - inode_unref(local->resolver_base_inode); - local->resolver_base_inode = NULL; - STACK_RESET(cleanup_frame->root); - return ret; -} - -int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, - gf_dirent_t *entry, inode_t *inode) { - int ret = 0; - int shard_count = 0; - int first_block = 0; - int now = 0; - uint64_t size = 0; - uint64_t block_size = 0; - uint64_t size_array[4] = { - 0, - }; - void *bsize = NULL; - void *size_attr = NULL; - dict_t *xattr_rsp = NULL; - loc_t loc = { - 0, - }; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = cleanup_frame->local; - ret = dict_reset(local->xattr_req); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to reset dict"); - ret = -ENOMEM; - goto err; - } - - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); - ret = -ENOMEM; - goto err; - } - - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); - ret = -ENOMEM; - goto err; - } - - loc.inode = inode_ref(inode); - loc.parent = inode_ref(priv->dot_shard_rm_inode); - ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", entry->d_name); - ret = -ENOMEM; - goto err; - } - - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, - &xattr_rsp); - if (ret) - goto err; - - ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); - goto err; - } - block_size = ntoh64(*((uint64_t *)bsize)); - - ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); - goto err; - } - - memcpy(size_array, size_attr, sizeof(size_array)); - size = ntoh64(size_array[0]); - - shard_count = (size / block_size) - 1; - if (shard_count < 0) { - gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond " - "its shard-block-size. Nothing to delete. " - "Returning", - entry->d_name); - /* File size < shard-block-size, so nothing to delete */ - ret = 0; - goto delete_marker; - } - if ((size % block_size) > 0) - shard_count++; - - if (shard_count == 0) { - gf_msg_debug(this->name, 0, "Size of %s is exactly equal to " - "its shard-block-size. Nothing to delete. " - "Returning", - entry->d_name); - ret = 0; - goto delete_marker; - } - gf_msg_debug(this->name, 0, - "base file = %s, " - "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", " - "shard_count=%d", - entry->d_name, block_size, size, shard_count); - - /* Perform a gfid-based lookup to see if gfid corresponding to marker - * file's base name exists. - */ - loc_wipe(&loc); - loc.inode = inode_new(this->itable); - if (!loc.inode) { - ret = -ENOMEM; - goto err; - } - gf_uuid_parse(entry->d_name, loc.gfid); - ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); - if (!ret) { - gf_msg_debug(this->name, 0, "Base shard corresponding to gfid " - "%s is present. Skipping shard deletion. " - "Returning", - entry->d_name); - ret = 0; - goto delete_marker; - } + local = frame->local; + priv = this->private; - first_block = 1; + cur_block = local->first_block + 1; + last_block = local->last_block; - while (shard_count) { - if (shard_count < local->deletion_rate) { - now = shard_count; - shard_count = 0; - } else { - now = local->deletion_rate; - shard_count -= local->deletion_rate; + /* Determine call count */ + for (i = 1; i < local->num_blocks; i++) { + if (!local->inode_list[i]) + continue; + call_count++; } - gf_msg_debug(this->name, 0, "deleting %d shards starting from " - "block %d of gfid %s", - now, first_block, entry->d_name); - ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block, - entry); - if (ret) - goto err; - first_block += now; - } + if (!call_count) { + /* Call count = 0 implies that all of the shards that need to be + * unlinked do not exist. So shard xlator would now proceed to + * do the final truncate + size updates. + */ + gf_msg_debug(this->name, 0, + "Shards to be unlinked as part of " + "truncate absent in backend: %s. Directly " + "proceeding to update file size", + uuid_utoa(inode->gfid)); + local->postbuf.ia_size = local->offset; + local->postbuf.ia_blocks = local->prebuf.ia_blocks; + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; + GF_ATOMIC_INIT(local->delta_blocks, 0); + local->hole_size = 0; + shard_update_file_size(frame, this, local->fd, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } -delete_marker: - loc_wipe(&loc); - loc.inode = inode_ref(inode); - loc.parent = inode_ref(priv->dot_shard_rm_inode); - ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", entry->d_name); - ret = -ENOMEM; - goto err; - } - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); - if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, - "Failed to delete %s " - "from /%s", - entry->d_name, GF_SHARD_REMOVE_ME_DIR); -err: - if (xattr_rsp) - dict_unref(xattr_rsp); - loc_wipe(&loc); - return ret; -} - -int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, - gf_dirent_t *entry, inode_t *inode) { - int ret = -1; - loc_t loc = { - 0, - }; - shard_priv_t *priv = NULL; - - priv = this->private; - loc.inode = inode_ref(priv->dot_shard_rm_inode); - - ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); - if (ret < 0) { - if (ret == -EAGAIN) { - ret = 0; - } - goto out; - } - { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } - syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); -out: - loc_wipe(&loc); - return ret; -} - -int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { - SHARD_STACK_DESTROY(frame); - return 0; -} - -int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, - shard_internal_dir_type_t type) { - int ret = 0; - char *bname = NULL; - loc_t *loc = NULL; - shard_priv_t *priv = NULL; - uuid_t gfid = { - 0, - }; - struct iatt stbuf = { - 0, - }; - - priv = this->private; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - loc = &local->dot_shard_loc; - gf_uuid_copy(gfid, priv->dot_shard_gfid); - bname = GF_SHARD_DIR; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - loc = &local->dot_shard_rm_loc; - gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); - bname = GF_SHARD_REMOVE_ME_DIR; - break; - default: - break; - } - - loc->inode = inode_find(this->itable, gfid); - if (!loc->inode) { - ret = shard_init_internal_dir_loc(this, local, type); - if (ret) - goto err; - ret = dict_reset(local->xattr_req); + local->call_count = call_count; + i = 1; + xdata_req = dict_new(); + if (!xdata_req) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } + ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to reset " - "dict"); - ret = -ENOMEM; - goto err; - } - ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); - ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req, - NULL); - if (ret < 0) { - if (ret != -ENOENT) - gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED, - "Lookup on %s failed, exiting", bname); - goto err; - } else { - shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict during truncate of %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + dict_unref(xdata_req); + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; } - } - ret = 0; -err: - return ret; -} - -int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, - gf_dirent_t *entry) { - int ret = 0; - loc_t loc = { - 0, - }; - - loc.inode = inode_new(this->itable); - if (!loc.inode) { - ret = -ENOMEM; - goto err; - } - loc.parent = inode_ref(local->fd->inode); - - ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", entry->d_name); - ret = -ENOMEM; - goto err; - } - - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - - ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); - if (ret < 0) { - goto err; - } - entry->inode = inode_ref(loc.inode); - ret = 0; -err: - loc_wipe(&loc); - return ret; -} - -int shard_delete_shards(void *opaque) { - int ret = 0; - off_t offset = 0; - loc_t loc = { - 0, - }; - inode_t *link_inode = NULL; - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *entry = NULL; - call_frame_t *cleanup_frame = NULL; - gf_boolean_t done = _gf_false; - - this = THIS; - priv = this->private; - INIT_LIST_HEAD(&entries.list); - - cleanup_frame = opaque; - - local = mem_get0(this->local_pool); - if (!local) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create local to " - "delete shards"); - ret = -ENOMEM; - goto err; - } - cleanup_frame->local = local; - local->fop = GF_FOP_UNLINK; - - local->xattr_req = dict_new(); - if (!local->xattr_req) { - ret = -ENOMEM; - goto err; - } - local->deletion_rate = priv->deletion_rate; - - ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret == -ENOENT) { - gf_msg_debug(this->name, 0, ".shard absent. Nothing to" - " delete. Exiting"); - ret = 0; - goto err; - } else if (ret < 0) { - goto err; - } - ret = shard_resolve_internal_dir(this, local, - SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); - if (ret == -ENOENT) { - gf_msg_debug(this->name, 0, ".remove_me absent. " - "Nothing to delete. Exiting"); - ret = 0; - goto err; - } else if (ret < 0) { - goto err; - } - - local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); - if (!local->fd) { - ret = -ENOMEM; - goto err; - } - - for (;;) { - offset = 0; + SHARD_SET_ROOT_FS_ID(frame, local); + while (cur_block <= last_block) { + if (!local->inode_list[i]) { + cur_block++; + i++; + continue; + } + if (wind_failed) { + shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + + shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); + bname = strrchr(path, '/') + 1; + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s. Base file gfid = %s", + bname, uuid_utoa(inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + loc.inode = inode_ref(local->inode_list[i]); + + STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, + (void *)(long)cur_block, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); + loc_wipe(&loc); + next: + i++; + cur_block++; + if (!--call_count) + break; + } + dict_unref(xdata_req); + return 0; +} + +int +shard_truncate_do(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (local->num_blocks == 1) { + /* This means that there are no shards to be unlinked. + * The fop boils down to truncating the last shard, updating + * the size and unwinding. + */ + shard_truncate_last_shard(frame, this, local->inode_list[0]); + return 0; + } else { + shard_truncate_htol(frame, this, local->loc.inode); + } + return 0; +} + +int +shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + shard_truncate_do(frame, this); + return 0; +} + +void +shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, + struct iatt *buf) +{ + int list_index = 0; + char block_bname[256] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *linked_inode = NULL; + xlator_t *this = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; + inode_t *base_inode = NULL; + + this = THIS; + priv = this->private; + if (local->loc.inode) { + gf_uuid_copy(gfid, local->loc.inode->gfid); + base_inode = local->loc.inode; + } else if (local->resolver_base_inode) { + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + base_inode = local->resolver_base_inode; + } else { + gf_uuid_copy(gfid, local->base_gfid); + } + + shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); + + shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); + linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); + inode_lookup(linked_inode); + list_index = block_num - local->first_block; + local->inode_list[list_index] = linked_inode; + LOCK(&priv->lock); { - if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { - priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; - } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { - priv->bg_del_state = SHARD_BG_DELETION_NONE; - done = _gf_true; - } + fsync_inode = __shard_update_shards_inode_list( + linked_inode, this, base_inode, block_num, gfid); } UNLOCK(&priv->lock); - if (done) - break; - while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, - &entries, local->xattr_req, NULL))) { - if (ret > 0) - ret = 0; - list_for_each_entry(entry, &entries.list, list) { - offset = entry->d_off; + if (fsync_inode) + shard_initiate_evicted_inode_fsync(this, fsync_inode); +} + +int +shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + int call_count = 0; + int shard_block_num = (long)cookie; + uuid_t gfid = { + 0, + }; + shard_local_t *local = NULL; + + local = frame->local; + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + if (op_ret < 0) { + /* Ignore absence of shards in the backend in truncate fop. */ + switch (local->fop) { + case GF_FOP_TRUNCATE: + case GF_FOP_FTRUNCATE: + case GF_FOP_RENAME: + case GF_FOP_UNLINK: + if (op_errno == ENOENT) + goto done; + break; + case GF_FOP_WRITE: + case GF_FOP_READ: + case GF_FOP_ZEROFILL: + case GF_FOP_DISCARD: + case GF_FOP_FALLOCATE: + if ((!local->first_lookup_done) && (op_errno == ENOENT)) { + LOCK(&frame->lock); + { + local->create_count++; + } + UNLOCK(&frame->lock); + goto done; + } + break; + default: + break; + } + + /* else */ + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_LOOKUP_SHARD_FAILED, + "Lookup on shard %d " + "failed. Base file gfid = %s", + shard_block_num, uuid_utoa(gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } + + shard_link_block_inode(local, shard_block_num, inode, buf); + +done: + if (local->lookup_shards_barriered) { + syncbarrier_wake(&local->barrier); + return 0; + } else { + call_count = shard_call_count_return(frame); + if (call_count == 0) { + if (!local->first_lookup_done) + local->first_lookup_done = _gf_true; + local->pls_fop_handler(frame, this); + } + } + return 0; +} + +dict_t * +shard_create_gfid_dict(dict_t *dict) +{ + int ret = 0; + dict_t *new = NULL; + unsigned char *gfid = NULL; + + new = dict_copy_with_ref(dict, NULL); + if (!new) + return NULL; + + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); + if (!gfid) { + ret = -1; + goto out; + } + + gf_uuid_generate(gfid); + + ret = dict_set_gfuuid(new, "gfid-req", gfid, false); - if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) - continue; +out: + if (ret) { + dict_unref(new); + new = NULL; + GF_FREE(gfid); + } + + return new; +} + +int +shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, + shard_post_lookup_shards_fop_handler_t handler) +{ + int i = 0; + int ret = 0; + int count = 0; + int call_count = 0; + int32_t shard_idx_iter = 0; + int last_block = 0; + char path[PATH_MAX] = { + 0, + }; + char *bname = NULL; + uuid_t gfid = { + 0, + }; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + gf_boolean_t wind_failed = _gf_false; + dict_t *xattr_req = NULL; - if (!entry->inode) { - ret = shard_lookup_marker_entry(this, local, entry); - if (ret < 0) + priv = this->private; + local = frame->local; + count = call_count = local->call_count; + shard_idx_iter = local->first_block; + last_block = local->last_block; + local->pls_fop_handler = handler; + if (local->lookup_shards_barriered) + local->barrier.waitfor = local->call_count; + + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + while (shard_idx_iter <= last_block) { + if (local->inode_list[i]) { + i++; + shard_idx_iter++; continue; } - link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name, - &entry->d_stat); - gf_msg_debug(this->name, 0, "Initiating deletion of " - "shards of gfid %s", - entry->d_name); - ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, - link_inode); - inode_unlink(link_inode, local->fd->inode, entry->d_name); - inode_unref(link_inode); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Failed to clean up shards of gfid %s", entry->d_name); - continue; + if (wind_failed) { + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, + this, -1, ENOMEM, NULL, NULL, NULL, + NULL); + goto next; } - gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED, - "Deleted " - "shards of gfid=%s from backend", - entry->d_name); - } - gf_dirent_free(&entries); - if (ret) - break; - } - } - ret = 0; - loc_wipe(&loc); - return ret; + + shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); + + bname = strrchr(path, '/') + 1; + loc.inode = inode_new(this->itable); + loc.parent = inode_ref(priv->dot_shard_inode); + gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0 || !(loc.inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s, base file gfid = %s", + bname, uuid_utoa(gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, + this, -1, ENOMEM, NULL, NULL, NULL, + NULL); + goto next; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + loc_wipe(&loc); + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, + this, -1, ENOMEM, NULL, NULL, NULL, + NULL); + goto next; + } + + STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, + (void *)(long)shard_idx_iter, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); + next: + shard_idx_iter++; + i++; + + if (!--call_count) + break; + } + if (local->lookup_shards_barriered) { + syncbarrier_wait(&local->barrier, count); + local->pls_fop_handler(frame, this); + } + return 0; +} + +int +shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + if (local->op_errno == ENOENT) { + /* If lookup on /.shard fails with ENOENT, it means that + * the file was 0-byte in size but truncated sometime in + * the past to a higher size which is reflected in the + * size xattr, and now being truncated to a lower size. + * In this case, the only thing that needs to be done is + * to update the size xattr of the file and unwind. + */ + local->first_block = local->last_block = 0; + local->num_blocks = 1; + local->call_count = 0; + local->op_ret = 0; + local->postbuf.ia_size = local->offset; + shard_update_file_size(frame, this, local->fd, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } else { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + } + + if (!local->call_count) + shard_truncate_do(frame, this); + else + shard_common_lookup_shards(frame, this, local->loc.inode, + shard_post_lookup_shards_truncate_handler); + + return 0; +} + +int +shard_truncate_begin(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + /* First participant block here is the lowest numbered block that would + * hold the last byte of the file post successful truncation. + * Last participant block is the block that contains the last byte in + * the current state of the file. + * If (first block == last_block): + * then that means that the file only needs truncation of the + * first (or last since both are same) block. + * Else + * if (new_size % block_size == 0) + * then that means there is no truncate to be done with + * only shards from first_block + 1 through the last + * block needing to be unlinked. + * else + * both truncate of the first block and unlink of the + * remaining shards until end of file is required. + */ + local->first_block = (local->offset == 0) + ? 0 + : get_lowest_block(local->offset - 1, + local->block_size); + local->last_block = get_highest_block(0, local->prebuf.ia_size, + local->block_size); + + local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) + ? local->loc.inode + : local->fd->inode; + + if ((local->first_block == 0) && (local->num_blocks == 1)) { + if (local->fop == GF_FOP_TRUNCATE) + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, + local->offset, local->xattr_req); + else + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, local->fd, + local->offset, local->xattr_req); + return 0; + } + + local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) + goto err; + + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + ret = shard_init_internal_dir_loc(this, local, + SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; + shard_lookup_internal_dir(frame, this, + shard_post_resolve_truncate_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_truncate_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; err: - LOCK(&priv->lock); - { priv->bg_del_state = SHARD_BG_DELETION_NONE; } - UNLOCK(&priv->lock); - loc_wipe(&loc); - return ret; -} - -int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) { - if (op_ret) - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Unlock failed. Please check brick logs for " - "more details"); - SHARD_STACK_DESTROY(frame); - return 0; -} - -int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) { - loc_t *loc = NULL; - call_frame_t *lk_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *lk_local = NULL; - shard_inodelk_t *lock = NULL; - - local = frame->local; - lk_frame = local->inodelk_frame; - lk_local = lk_frame->local; - local->inodelk_frame = NULL; - loc = &local->int_inodelk.loc; - lock = &lk_local->int_inodelk; - lock->flock.l_type = F_UNLCK; - - STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, - &lock->flock, NULL); - local->int_inodelk.acquired_lock = _gf_false; - return 0; -} - -int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata); -int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) { - int ret = 0; - loc_t *dst_loc = NULL; - loc_t tmp_loc = { - 0, - }; - shard_local_t *local = frame->local; - - if (local->dst_block_size) { - tmp_loc.parent = inode_ref(local->loc2.parent); - ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on pargfid=%s bname=%s", - uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - - tmp_loc.name = strrchr(tmp_loc.path, '/'); - if (tmp_loc.name) - tmp_loc.name++; - dst_loc = &tmp_loc; - } else { - dst_loc = &local->loc2; - } - - /* To-Do: Request open-fd count on dst base file */ - STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, - local->xattr_req); - loc_wipe(&tmp_loc); - return 0; -err: - loc_wipe(&tmp_loc); - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; -} - -int shard_unlink_base_file(call_frame_t *frame, xlator_t *this); - -int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict, - dict_t *xdata) { - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Xattrop on marker file failed " - "while performing %s; entry gfid=%s", - gf_fop_string(local->fop), local->newloc.name); - goto err; - } - - inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, - local->newloc.name); - - if (local->fop == GF_FOP_UNLINK) - shard_unlink_base_file(frame, this); - else if (local->fop == GF_FOP_RENAME) - shard_rename_src_base_file(frame, this); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); - return 0; -} - -int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) { - int op_errno = ENOMEM; - uint64_t bs = 0; - dict_t *xdata = NULL; - shard_local_t *local = NULL; - - local = frame->local; - xdata = dict_new(); - if (!xdata) - goto err; - - if (local->fop == GF_FOP_UNLINK) - bs = local->block_size; - else if (local->fop == GF_FOP_RENAME) - bs = local->dst_block_size; - SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, - local->prebuf.ia_size, 0, err); - STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, &local->newloc, - GF_XATTROP_GET_AND_SET, xdata, NULL); - dict_unref(xdata); - return 0; -err: - if (xdata) - dict_unref(xdata); - shard_common_failure_unwind(local->fop, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; } -int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) { - inode_t *linked_inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - local = frame->local; - priv = this->private; - - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Lookup on marker file failed " - "while performing %s; entry gfid=%s", - gf_fop_string(local->fop), local->newloc.name); - goto err; - } - - linked_inode = - inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); - inode_unref(local->newloc.inode); - local->newloc.inode = linked_inode; - shard_set_size_attrs_on_marker_file(frame, this); - return 0; +int +shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + struct iatt tmp_stbuf = { + 0, + }; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + local->postbuf = tmp_stbuf = local->prebuf; + + if (local->prebuf.ia_size == local->offset) { + /* If the file size is same as requested size, unwind the call + * immediately. + */ + if (local->fop == GF_FOP_TRUNCATE) + SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, + &local->postbuf, NULL); + else + SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, + &local->postbuf, NULL); + } else if (local->offset > local->prebuf.ia_size) { + /* If the truncate is from a lower to a higher size, set the + * new size xattr and unwind. + */ + local->hole_size = local->offset - local->prebuf.ia_size; + local->delta_size = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); + local->postbuf.ia_size = local->offset; + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, + SHARD_INODE_WRITE_MASK); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + } else { + /* ... else + * i. unlink all shards that need to be unlinked. + * ii. truncate the last of the shards. + * iii. update the new size using setxattr. + * and unwind the fop. + */ + local->hole_size = 0; + local->delta_size = (local->offset - local->prebuf.ia_size); + GF_ATOMIC_INIT(local->delta_blocks, 0); + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, + SHARD_INODE_WRITE_MASK); + shard_truncate_begin(frame, this); + } + return 0; +} + +/* TO-DO: + * Fix updates to size and block count with racing write(s) and truncate(s). + */ + +int +shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; + } + + if (!this->itable) + this->itable = loc->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + loc_copy(&local->loc, loc); + local->offset = offset; + local->block_size = block_size; + local->fop = GF_FOP_TRUNCATE; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->resolver_base_inode = loc->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_truncate_handler); + return 0; + err: - shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); + return 0; } -int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) { - int op_errno = ENOMEM; - dict_t *xattr_req = NULL; - shard_local_t *local = NULL; +int +shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + local->fd = fd_ref(fd); + local->offset = offset; + local->block_size = block_size; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_FTRUNCATE; + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + local->resolver_base_inode = fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_truncate_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); + return 0; +} + +int +shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) + goto unwind; + + ret = shard_inode_ctx_set(inode, this, buf, local->block_size, + SHARD_ALL_MASK); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, + "Failed to set inode " + "ctx for %s", + uuid_utoa(inode->gfid)); + +unwind: + SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + + return 0; +} + +int +shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->block_size = priv->block_size; + if (!__is_gsyncd_on_shard_dir(frame, loc)) { + SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); + } + + STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; +err: + shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); + return 0; +} + +int32_t +shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + shard_local_t *local = NULL; + + local = frame->local; + if (op_ret < 0) + goto err; + + shard_inode_ctx_set(inode, this, buf, 0, + SHARD_MASK_NLINK | SHARD_MASK_TIMES); + buf->ia_size = local->prebuf.ia_size; + buf->ia_blocks = local->prebuf.ia_blocks; + + SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; +err: + shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); + return 0; +} + +int +shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, + NULL, NULL, NULL, NULL); + return 0; + } + + STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, + local->xattr_req); + return 0; +} + +int32_t +shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(oldloc->inode->gfid)); + goto err; + } + + if (!block_size) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + return 0; + } + + if (!this->itable) + this->itable = oldloc->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_link_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); + return 0; +} + +int +shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); + +int +shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; + + local = frame->local; + + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, + "failed to delete shards of %s", uuid_utoa(gfid)); + return 0; + } + local->op_ret = 0; + local->op_errno = 0; + + shard_unlink_shards_do(frame, this, local->resolver_base_inode); + return 0; +} + +int +shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + local->lookup_shards_barriered = _gf_true; + + if (!local->call_count) + shard_unlink_shards_do(frame, this, local->resolver_base_inode); + else + shard_common_lookup_shards(frame, this, local->resolver_base_inode, + shard_post_lookup_shards_unlink_handler); + return 0; +} + +void +shard_unlink_block_inode(shard_local_t *local, int shard_block_num) +{ + char block_bname[256] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *inode = NULL; + inode_t *base_inode = NULL; + xlator_t *this = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; + int unref_base_inode = 0; + int unref_shard_inode = 0; + + this = THIS; + priv = this->private; + + inode = local->inode_list[shard_block_num - local->first_block]; + shard_inode_ctx_get(inode, this, &ctx); + base_inode = ctx->base_inode; + if (base_inode) + gf_uuid_copy(gfid, base_inode->gfid); + else + gf_uuid_copy(gfid, ctx->base_gfid); + shard_make_block_bname(shard_block_num, gfid, block_bname, + sizeof(block_bname)); + + LOCK(&priv->lock); + if (base_inode) + LOCK(&base_inode->lock); + LOCK(&inode->lock); + { + __shard_inode_ctx_get(inode, this, &ctx); + if (!list_empty(&ctx->ilist)) { + list_del_init(&ctx->ilist); + priv->inode_count--; + unref_base_inode++; + unref_shard_inode++; + GF_ASSERT(priv->inode_count >= 0); + } + if (ctx->fsync_needed) { + unref_base_inode++; + unref_shard_inode++; + list_del_init(&ctx->to_fsync_list); + if (base_inode) { + __shard_inode_ctx_get(base_inode, this, &base_ictx); + base_ictx->fsync_count--; + } + } + } + UNLOCK(&inode->lock); + if (base_inode) + UNLOCK(&base_inode->lock); + + inode_unlink(inode, priv->dot_shard_inode, block_bname); + inode_ref_reduce_by_n(inode, unref_shard_inode); + inode_forget(inode, 0); + + if (base_inode && unref_base_inode) + inode_ref_reduce_by_n(base_inode, unref_base_inode); + UNLOCK(&priv->lock); +} + +int +shard_rename_cbk(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->preoldparent, + &local->postoldparent, &local->prenewparent, + &local->postnewparent, local->xattr_rsp); + return 0; +} + +int32_t +shard_unlink_cbk(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = frame->local; + + SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preoldparent, &local->postoldparent, + local->xattr_rsp); + return 0; +} + +int +shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int shard_block_num = (long)cookie; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } + + shard_unlink_block_inode(local, shard_block_num); +done: + syncbarrier_wake(&local->barrier); + return 0; +} + +int +shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode) +{ + int i = 0; + int ret = -1; + int count = 0; + uint32_t cur_block = 0; + uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ + char *bname = NULL; + char path[PATH_MAX] = { + 0, + }; + uuid_t gfid = { + 0, + }; + loc_t loc = { + 0, + }; + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + for (i = 0; i < local->num_blocks; i++) { + if (!local->inode_list[i]) + continue; + count++; + } + + if (!count) { + /* callcount = 0 implies that all of the shards that need to be + * unlinked are non-existent (in other words the file is full of + * holes). + */ + gf_msg_debug(this->name, 0, + "All shards that need to be " + "unlinked are non-existent: %s", + uuid_utoa(gfid)); + return 0; + } + + SHARD_SET_ROOT_FS_ID(frame, local); + local->barrier.waitfor = count; + cur_block = cur_block_idx + local->first_block; + + while (cur_block_idx < local->num_blocks) { + if (!local->inode_list[cur_block_idx]) + goto next; + + if (wind_failed) { + shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + + shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); + bname = strrchr(path, '/') + 1; + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s, base file gfid = %s", + bname, uuid_utoa(gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + loc.inode = inode_ref(local->inode_list[cur_block_idx]); + + STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, + (void *)(long)cur_block, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, + local->xattr_req); + loc_wipe(&loc); + next: + cur_block++; + cur_block_idx++; + } + syncbarrier_wait(&local->barrier, count); + SHARD_UNSET_ROOT_FS_ID(frame, local); + return 0; +} + +int +shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, + int now, int first_block, gf_dirent_t *entry) +{ + int i = 0; + int ret = 0; + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; + + local = cleanup_frame->local; + + local->inode_list = GF_CALLOC(now, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) + return -ENOMEM; + + local->first_block = first_block; + local->last_block = first_block + now - 1; + local->num_blocks = now; + gf_uuid_parse(entry->d_name, gfid); + gf_uuid_copy(local->base_gfid, gfid); + local->resolver_base_inode = inode_find(this->itable, gfid); + local->call_count = 0; + ret = syncbarrier_init(&local->barrier); + if (ret) { + GF_FREE(local->inode_list); + local->inode_list = NULL; + inode_unref(local->resolver_base_inode); + local->resolver_base_inode = NULL; + return -errno; + } + shard_common_resolve_shards(cleanup_frame, this, + shard_post_resolve_unlink_handler); + + for (i = 0; i < local->num_blocks; i++) { + if (local->inode_list[i]) + inode_unref(local->inode_list[i]); + } + GF_FREE(local->inode_list); + local->inode_list = NULL; + if (local->op_ret) + ret = -local->op_errno; + syncbarrier_destroy(&local->barrier); + inode_unref(local->resolver_base_inode); + local->resolver_base_inode = NULL; + STACK_RESET(cleanup_frame->root); + return ret; +} + +int +__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, + gf_dirent_t *entry, inode_t *inode) +{ + int ret = 0; + int shard_count = 0; + int first_block = 0; + int now = 0; + uint64_t size = 0; + uint64_t block_size = 0; + uint64_t size_array[4] = { + 0, + }; + void *bsize = NULL; + void *size_attr = NULL; + dict_t *xattr_rsp = NULL; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = cleanup_frame->local; + ret = dict_reset(local->xattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to reset dict"); + ret = -ENOMEM; + goto err; + } + + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); + ret = -ENOMEM; + goto err; + } + + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); + ret = -ENOMEM; + goto err; + } + + loc.inode = inode_ref(inode); + loc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, + &xattr_rsp); + if (ret) + goto err; + + ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); + goto err; + } + block_size = ntoh64(*((uint64_t *)bsize)); + + ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); + goto err; + } + + memcpy(size_array, size_attr, sizeof(size_array)); + size = ntoh64(size_array[0]); + + shard_count = (size / block_size) - 1; + if (shard_count < 0) { + gf_msg_debug(this->name, 0, + "Size of %s hasn't grown beyond " + "its shard-block-size. Nothing to delete. " + "Returning", + entry->d_name); + /* File size < shard-block-size, so nothing to delete */ + ret = 0; + goto delete_marker; + } + if ((size % block_size) > 0) + shard_count++; + + if (shard_count == 0) { + gf_msg_debug(this->name, 0, + "Size of %s is exactly equal to " + "its shard-block-size. Nothing to delete. " + "Returning", + entry->d_name); + ret = 0; + goto delete_marker; + } + gf_msg_debug(this->name, 0, + "base file = %s, " + "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 + ", " + "shard_count=%d", + entry->d_name, block_size, size, shard_count); + + /* Perform a gfid-based lookup to see if gfid corresponding to marker + * file's base name exists. + */ + loc_wipe(&loc); + loc.inode = inode_new(this->itable); + if (!loc.inode) { + ret = -ENOMEM; + goto err; + } + gf_uuid_parse(entry->d_name, loc.gfid); + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); + if (!ret) { + gf_msg_debug(this->name, 0, + "Base shard corresponding to gfid " + "%s is present. Skipping shard deletion. " + "Returning", + entry->d_name); + ret = 0; + goto delete_marker; + } + + first_block = 1; + + while (shard_count) { + if (shard_count < local->deletion_rate) { + now = shard_count; + shard_count = 0; + } else { + now = local->deletion_rate; + shard_count -= local->deletion_rate; + } + + gf_msg_debug(this->name, 0, + "deleting %d shards starting from " + "block %d of gfid %s", + now, first_block, entry->d_name); + ret = shard_regulated_shards_deletion(cleanup_frame, this, now, + first_block, entry); + if (ret) + goto err; + first_block += now; + } + +delete_marker: + loc_wipe(&loc); + loc.inode = inode_ref(inode); + loc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to delete %s " + "from /%s", + entry->d_name, GF_SHARD_REMOVE_ME_DIR); +err: + if (xattr_rsp) + dict_unref(xattr_rsp); + loc_wipe(&loc); + return ret; +} + +int +shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, + gf_dirent_t *entry, inode_t *inode) +{ + int ret = -1; + loc_t loc = { + 0, + }; + shard_priv_t *priv = NULL; + + priv = this->private; + loc.inode = inode_ref(priv->dot_shard_rm_inode); + + ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); + if (ret < 0) { + if (ret == -EAGAIN) { + ret = 0; + } + goto out; + } + { + ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); + } + syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); +out: + loc_wipe(&loc); + return ret; +} + +int +shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) +{ + SHARD_STACK_DESTROY(frame); + return 0; +} + +int +shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, + shard_internal_dir_type_t type) +{ + int ret = 0; + char *bname = NULL; + loc_t *loc = NULL; + shard_priv_t *priv = NULL; + uuid_t gfid = { + 0, + }; + struct iatt stbuf = { + 0, + }; + + priv = this->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + loc = &local->dot_shard_loc; + gf_uuid_copy(gfid, priv->dot_shard_gfid); + bname = GF_SHARD_DIR; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + loc = &local->dot_shard_rm_loc; + gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); + bname = GF_SHARD_REMOVE_ME_DIR; + break; + default: + break; + } + + loc->inode = inode_find(this->itable, gfid); + if (!loc->inode) { + ret = shard_init_internal_dir_loc(this, local, type); + if (ret) + goto err; + ret = dict_reset(local->xattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to reset " + "dict"); + ret = -ENOMEM; + goto err; + } + ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, + local->xattr_req, NULL); + if (ret < 0) { + if (ret != -ENOENT) + gf_msg(this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Lookup on %s failed, exiting", bname); + goto err; + } else { + shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); + } + } + ret = 0; +err: + return ret; +} + +int +shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, + gf_dirent_t *entry) +{ + int ret = 0; + loc_t loc = { + 0, + }; + + loc.inode = inode_new(this->itable); + if (!loc.inode) { + ret = -ENOMEM; + goto err; + } + loc.parent = inode_ref(local->fd->inode); + + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); + if (ret < 0) { + goto err; + } + entry->inode = inode_ref(loc.inode); + ret = 0; +err: + loc_wipe(&loc); + return ret; +} + +int +shard_delete_shards(void *opaque) +{ + int ret = 0; + off_t offset = 0; + loc_t loc = { + 0, + }; + inode_t *link_inode = NULL; + xlator_t *this = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + call_frame_t *cleanup_frame = NULL; + gf_boolean_t done = _gf_false; + + this = THIS; + priv = this->private; + INIT_LIST_HEAD(&entries.list); + + cleanup_frame = opaque; + + local = mem_get0(this->local_pool); + if (!local) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create local to " + "delete shards"); + ret = -ENOMEM; + goto err; + } + cleanup_frame->local = local; + local->fop = GF_FOP_UNLINK; + + local->xattr_req = dict_new(); + if (!local->xattr_req) { + ret = -ENOMEM; + goto err; + } + local->deletion_rate = priv->deletion_rate; + + ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret == -ENOENT) { + gf_msg_debug(this->name, 0, + ".shard absent. Nothing to" + " delete. Exiting"); + ret = 0; + goto err; + } else if (ret < 0) { + goto err; + } + + ret = shard_resolve_internal_dir(this, local, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + if (ret == -ENOENT) { + gf_msg_debug(this->name, 0, + ".remove_me absent. " + "Nothing to delete. Exiting"); + ret = 0; + goto err; + } else if (ret < 0) { + goto err; + } + + local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); + if (!local->fd) { + ret = -ENOMEM; + goto err; + } + + for (;;) { + offset = 0; + LOCK(&priv->lock); + { + if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { + priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; + } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + done = _gf_true; + } + } + UNLOCK(&priv->lock); + if (done) + break; + while ( + (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, + &entries, local->xattr_req, NULL))) { + if (ret > 0) + ret = 0; + list_for_each_entry(entry, &entries.list, list) + { + offset = entry->d_off; + + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; + + if (!entry->inode) { + ret = shard_lookup_marker_entry(this, local, entry); + if (ret < 0) + continue; + } + link_inode = inode_link(entry->inode, local->fd->inode, + entry->d_name, &entry->d_stat); + + gf_msg_debug(this->name, 0, + "Initiating deletion of " + "shards of gfid %s", + entry->d_name); + ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, + link_inode); + inode_unlink(link_inode, local->fd->inode, entry->d_name); + inode_unref(link_inode); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to clean up shards of gfid %s", + entry->d_name); + continue; + } + gf_msg(this->name, GF_LOG_INFO, 0, + SHARD_MSG_SHARD_DELETION_COMPLETED, + "Deleted " + "shards of gfid=%s from backend", + entry->d_name); + } + gf_dirent_free(&entries); + if (ret) + break; + } + } + ret = 0; + loc_wipe(&loc); + return ret; + +err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); + loc_wipe(&loc); + return ret; +} + +int +shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + if (op_ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY(frame); + return 0; +} + +int +shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) +{ + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *lock = NULL; + + local = frame->local; + lk_frame = local->inodelk_frame; + lk_local = lk_frame->local; + local->inodelk_frame = NULL; + loc = &local->int_inodelk.loc; + lock = &lk_local->int_inodelk; + lock->flock.l_type = F_UNLCK; + + STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, + &lock->flock, NULL); + local->int_inodelk.acquired_lock = _gf_false; + return 0; +} + +int +shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata); +int +shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + loc_t *dst_loc = NULL; + loc_t tmp_loc = { + 0, + }; + shard_local_t *local = frame->local; + + if (local->dst_block_size) { + tmp_loc.parent = inode_ref(local->loc2.parent); + ret = inode_path(tmp_loc.parent, local->loc2.name, + (char **)&tmp_loc.path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on pargfid=%s bname=%s", + uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + tmp_loc.name = strrchr(tmp_loc.path, '/'); + if (tmp_loc.name) + tmp_loc.name++; + dst_loc = &tmp_loc; + } else { + dst_loc = &local->loc2; + } + + /* To-Do: Request open-fd count on dst base file */ + STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, + local->xattr_req); + loc_wipe(&tmp_loc); + return 0; +err: + loc_wipe(&tmp_loc); + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +} + +int +shard_unlink_base_file(call_frame_t *frame, xlator_t *this); + +int +shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Xattrop on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } + + inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, + local->newloc.name); + + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); + return 0; +} + +int +shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) +{ + int op_errno = ENOMEM; + uint64_t bs = 0; + dict_t *xdata = NULL; + shard_local_t *local = NULL; + + local = frame->local; + xdata = dict_new(); + if (!xdata) + goto err; + + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, + local->prebuf.ia_size, 0, err); + STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, + &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); + dict_unref(xdata); + return 0; +err: + if (xdata) + dict_unref(xdata); + shard_common_failure_unwind(local->fop, frame, -1, op_errno); + return 0; +} + +int +shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Lookup on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } + + linked_inode = inode_link(inode, priv->dot_shard_rm_inode, + local->newloc.name, buf); + inode_unref(local->newloc.inode); + local->newloc.inode = linked_inode; + shard_set_size_attrs_on_marker_file(frame, this); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); + return 0; +} + +int +shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) +{ + int op_errno = ENOMEM; + dict_t *xattr_req = NULL; + shard_local_t *local = NULL; + + local = frame->local; + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) + goto err; + + STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); + dict_unref(xattr_req); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, op_errno); + return 0; +} + +int +shard_create_marker_file_under_remove_me_cbk( + call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (op_ret < 0) { + if ((op_errno != EEXIST) && (op_errno != ENODATA)) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Marker file creation " + "failed while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } else { + shard_lookup_marker_file(frame, this); + return 0; + } + } + + linked_inode = inode_link(inode, priv->dot_shard_rm_inode, + local->newloc.name, buf); + inode_unref(local->newloc.inode); + local->newloc.inode = linked_inode; + + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; +} + +int +shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this, + loc_t *loc) +{ + int ret = 0; + int op_errno = ENOMEM; + uint64_t bs = 0; + char g1[64] = { + 0, + }; + char g2[64] = { + 0, + }; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + SHARD_SET_ROOT_FS_ID(frame, local); + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) + goto err; + + local->newloc.inode = inode_new(this->itable); + local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), + (char **)&local->newloc.path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on " + "pargfid=%s bname=%s", + uuid_utoa_r(priv->dot_shard_rm_gfid, g1), + uuid_utoa_r(loc->inode->gfid, g2)); + goto err; + } + local->newloc.name = strrchr(local->newloc.path, '/'); + if (local->newloc.name) + local->newloc.name++; + + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + + SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, + local->prebuf.ia_size, 0, err); + + STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, + &local->newloc, 0, 0, 0644, xattr_req); + dict_unref(xattr_req); + return 0; + +err: + if (xattr_req) + dict_unref(xattr_req); + shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, + NULL, NULL, NULL, NULL, NULL); + return 0; +} + +int +shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); + +int +shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) +{ + int ret = 0; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); + local->preoldparent = *preparent; + local->postoldparent = *postparent; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + if (local->cleanup_required) + shard_start_background_deletion(this); + } + + if (local->entrylk_frame) { + ret = shard_unlock_entrylk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + } + + ret = shard_unlock_inodelk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + + shard_unlink_cbk(frame, this); + return 0; +} + +int +shard_unlink_base_file(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = frame->local; + + /* To-Do: Request open-fd count on base file */ + STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, + local->xattr_req); + return 0; +} + +int +shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + if (op_ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY(frame); + return 0; +} + +int +shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) +{ + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_entrylk_t *lock = NULL; + + local = frame->local; + lk_frame = local->entrylk_frame; + lk_local = lk_frame->local; + local->entrylk_frame = NULL; + lock = &lk_local->int_entrylk; + loc = &lock->loc; + + STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, loc, + lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, + NULL); + local->int_entrylk.acquired_lock = _gf_false; + return 0; +} + +int +shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_create_marker_file_under_remove_me(frame, this, + &local->int_inodelk.loc); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-entrylk handler not defined. This case should not" + " be hit"); + break; + } + return 0; +} + +int +shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; + + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(main_local->fop, main_frame, op_ret, + op_errno); + return 0; + } + main_local->int_entrylk.acquired_lock = _gf_true; + shard_post_entrylk_fop_handler(main_frame, this); + return 0; +} + +int +shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, + uuid_t gfid) +{ + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; + shard_local_t *local = NULL; + shard_local_t *entrylk_local = NULL; + shard_entrylk_t *int_entrylk = NULL; + call_frame_t *entrylk_frame = NULL; + + local = frame->local; + entrylk_frame = create_frame(this, this->ctx->pool); + if (!entrylk_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to lock marker file"); + goto err; + } + + entrylk_local = mem_get0(this->local_pool); + if (!entrylk_local) { + STACK_DESTROY(entrylk_frame->root); + goto err; + } + + entrylk_frame->local = entrylk_local; + entrylk_local->main_frame = frame; + int_entrylk = &entrylk_local->int_entrylk; + + int_entrylk->loc.inode = inode_ref(inode); + set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); + local->entrylk_frame = entrylk_frame; + gf_uuid_unparse(gfid, gfid_str); + int_entrylk->basename = gf_strdup(gfid_str); + + STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, + int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; +} + +int +shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; + } + + if (local->prebuf.ia_nlink > 1) { + gf_msg_debug(this->name, 0, + "link count on %s > 1:%d, " + "performing rename()/unlink()", + local->int_inodelk.loc.path, local->prebuf.ia_nlink); + if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + else if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + } else { + gf_msg_debug(this->name, 0, + "link count on %s = 1, creating " + "file under .remove_me", + local->int_inodelk.loc.path); + local->cleanup_required = _gf_true; + shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, + local->prebuf.ia_gfid); + } + return 0; +} + +int +shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_lookup_base_file(frame, this, &local->int_inodelk.loc, + shard_post_lookup_base_shard_rm_handler); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-inodelk handler not defined. This case should not" + " be hit"); + break; + } + return 0; +} + +int +shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; + + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(main_local->fop, main_frame, op_ret, + op_errno); + return 0; + } + main_local->int_inodelk.acquired_lock = _gf_true; + shard_post_inodelk_fop_handler(main_frame, this); + return 0; +} + +int +shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *int_inodelk = NULL; + + local = frame->local; + lk_frame = create_frame(this, this->ctx->pool); + if (!lk_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to lock base shard"); + goto err; + } + lk_local = mem_get0(this->local_pool); + if (!lk_local) { + STACK_DESTROY(lk_frame->root); + goto err; + } + + lk_frame->local = lk_local; + lk_local->main_frame = frame; + int_inodelk = &lk_local->int_inodelk; + + int_inodelk->flock.l_len = 0; + int_inodelk->flock.l_start = 0; + int_inodelk->domain = this->name; + int_inodelk->flock.l_type = F_WRLCK; + loc_copy(&local->int_inodelk.loc, loc); + set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); + local->inodelk_frame = lk_frame; + + STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, + &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; +} + +int +shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +{ + loc_t *loc = NULL; + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; + } + if (local->fop == GF_FOP_UNLINK) + loc = &local->loc; + else if (local->fop == GF_FOP_RENAME) + loc = &local->loc2; + shard_acquire_inodelk(frame, this, loc); + return 0; +} + +int +shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler, + shard_internal_dir_type_t type); +int +shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; + } + shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + return 0; +} + +void +shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + local->dot_shard_rm_loc.inode = inode_find(this->itable, + priv->dot_shard_rm_gfid); + if (!local->dot_shard_rm_loc.inode) { + local->dot_shard_loc.inode = inode_find(this->itable, + priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_pre_mkdir_rm_handler; + shard_refresh_internal_dir(frame, this, + SHARD_INTERNAL_DIR_DOT_SHARD); + } + } else { + local->post_res_handler = shard_post_mkdir_rm_handler; + shard_refresh_internal_dir(frame, this, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + } +} + +int +shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + loc_copy(&local->loc, loc); + local->xflag = xflag; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + local->block_size = block_size; + local->resolver_base_inode = loc->inode; + local->fop = GF_FOP_UNLINK; + if (!this->itable) + this->itable = (local->loc.inode)->table; + + local->resolve_not = _gf_true; + shard_begin_rm_resolution(frame, this); + return 0; +err: + shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); + return 0; +} + +int +shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) +{ + shard_rename_cbk(frame, this); + return 0; +} + +int +shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + int ret = 0; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + /* Set ctx->refresh to TRUE to force a lookup on disk when + * shard_lookup_base_file() is called next to refresh the hard link + * count in ctx. Note that this is applicable only to the case where + * the rename dst is already existent and sharded. + */ + if ((local->dst_block_size) && (!local->cleanup_required)) + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); + + local->prebuf = *buf; + local->preoldparent = *preoldparent; + local->postoldparent = *postoldparent; + local->prenewparent = *prenewparent; + local->postnewparent = *postnewparent; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + + if (local->dst_block_size) { + if (local->entrylk_frame) { + ret = shard_unlock_entrylk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + } + + ret = shard_unlock_inodelk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + goto err; + } + if (local->cleanup_required) + shard_start_background_deletion(this); + } + + /* Now the base file of src, if sharded, is looked up to gather ia_size + * and ia_blocks.*/ + if (local->block_size) { + local->tmp_loc.inode = inode_new(this->itable); + gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); + shard_lookup_base_file(frame, this, &local->tmp_loc, + shard_post_rename_lookup_handler); + } else { + shard_rename_cbk(frame, this); + } + return 0; +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +} + +int +shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + /* Save dst base file attributes into postbuf so the information is not + * lost when it is overwritten after lookup on base file of src in + * shard_lookup_base_file_cbk(). + */ + local->postbuf = local->prebuf; + shard_rename_src_base_file(frame, this); + return 0; +} + +int +shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + uint64_t dst_block_size = 0; + shard_local_t *local = NULL; + + if (IA_ISDIR(oldloc->inode->ia_type)) { + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + + ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); + if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(oldloc->inode->gfid)); + goto err; + } + + if (newloc->inode) + ret = shard_inode_ctx_get_block_size(newloc->inode, this, + &dst_block_size); + + /* The following stack_wind covers the case where: + * a. the src file is not sharded and dst doesn't exist, OR + * b. the src and dst both exist but are not sharded. + */ + if (((!block_size) && (!dst_block_size)) || + frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + local->resolver_base_inode = newloc->inode; + local->fop = GF_FOP_RENAME; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + local->block_size = block_size; + local->dst_block_size = dst_block_size; + if (!this->itable) + this->itable = (local->loc.inode)->table; + local->resolve_not = _gf_true; + + /* The following if-block covers the case where the dst file exists + * and is sharded. + */ + if (local->dst_block_size) { + shard_begin_rm_resolution(frame, this); + } else { + /* The following block covers the case where the dst either doesn't + * exist or is NOT sharded but the src is sharded. In this case, shard + * xlator would go ahead and rename src to dst. Once done, it would also + * lookup the base shard of src to get the ia_size and ia_blocks xattr + * values. + */ + shard_rename_src_base_file(frame, this); + } + return 0; + +err: + shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); + return 0; +} + +int +shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int ret = -1; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) + goto unwind; + + ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, + SHARD_ALL_MASK); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, + "Failed to set inode " + "ctx for %s", + uuid_utoa(inode->gfid)); + +unwind: + SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); + return 0; +} + +int +shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + local->block_size = priv->block_size; + + if (!__is_gsyncd_on_shard_dir(frame, loc)) { + SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); + } + + STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +err: + shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); + return 0; +} + +int +shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + /* To-Do: Handle open with O_TRUNC under locks */ + SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int +shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int +shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) +{ + int i = 0; + int call_count = 0; + void *address = NULL; + uint64_t block_num = 0; + off_t off = 0; + struct iovec vec = { + 0, + }; + shard_local_t *local = NULL; + fd_t *anon_fd = cookie; + shard_inode_ctx_t *ctx = NULL; + + local = frame->local; + + /* If shard has already seen a failure here before, there is no point + * in aggregating subsequent reads, so just go to out. + */ + if (local->op_ret < 0) + goto out; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto out; + } + + if (local->op_ret >= 0) + local->op_ret += op_ret; + + shard_inode_ctx_get(anon_fd->inode, this, &ctx); + block_num = ctx->block_num; + + if (block_num == local->first_block) { + address = local->iobuf->ptr; + } else { + /* else + * address to start writing to = beginning of buffer + + * number of bytes until end of first block + + * + block_size times number of blocks + * between the current block and the first + */ + address = (char *)local->iobuf->ptr + + (local->block_size - (local->offset % local->block_size)) + + ((block_num - local->first_block - 1) * local->block_size); + } + + for (i = 0; i < count; i++) { + address = (char *)address + off; + memcpy(address, vector[i].iov_base, vector[i].iov_len); + off += vector[i].iov_len; + } + +out: + if (anon_fd) + fd_unref(anon_fd); + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + } else { + if (xdata) + local->xattr_rsp = dict_ref(xdata); + vec.iov_base = local->iobuf->ptr; + if (local->offset + local->req_size > local->prebuf.ia_size) + local->total_size = local->prebuf.ia_size - local->offset; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, + &vec, 1, &local->prebuf, local->iobref, + local->xattr_rsp); + return 0; + } + } + + return 0; +} + +int +shard_readv_do(call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int call_count = 0; + int last_block = 0; + int cur_block = 0; + off_t orig_offset = 0; + off_t shard_offset = 0; + size_t read_size = 0; + size_t remaining_size = 0; + fd_t *fd = NULL; + fd_t *anon_fd = NULL; + shard_local_t *local = NULL; + gf_boolean_t wind_failed = _gf_false; + + local = frame->local; + fd = local->fd; + + orig_offset = local->offset; + cur_block = local->first_block; + last_block = local->last_block; + remaining_size = local->total_size; + local->call_count = call_count = local->num_blocks; + + SHARD_SET_ROOT_FS_ID(frame, local); + + if (fd->flags & O_DIRECT) + local->flags = O_DIRECT; + + while (cur_block <= last_block) { + if (wind_failed) { + shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, + 0, NULL, NULL, NULL); + goto next; + } - local = frame->local; + shard_offset = orig_offset % local->block_size; + read_size = local->block_size - shard_offset; + if (read_size > remaining_size) + read_size = remaining_size; + + remaining_size -= read_size; + + if (cur_block == 0) { + anon_fd = fd_ref(fd); + } else { + anon_fd = fd_anonymous(local->inode_list[i]); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, + ENOMEM, NULL, 0, NULL, NULL, NULL); + goto next; + } + } - xattr_req = shard_create_gfid_dict(local->xattr_req); - if (!xattr_req) - goto err; + STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, anon_fd, read_size, + shard_offset, local->flags, local->xattr_req); - STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); - dict_unref(xattr_req); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, op_errno); - return 0; + orig_offset += read_size; + next: + cur_block++; + i++; + call_count--; + } + return 0; } -int shard_create_marker_file_under_remove_me_cbk( - call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) { - inode_t *linked_inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - local = frame->local; - priv = this->private; - - SHARD_UNSET_ROOT_FS_ID(frame, local); - if (op_ret < 0) { - if ((op_errno != EEXIST) && (op_errno != ENODATA)) { - local->op_ret = op_ret; - local->op_errno = op_errno; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Marker file creation " - "failed while performing %s; entry gfid=%s", - gf_fop_string(local->fop), local->newloc.name); - goto err; - } else { - shard_lookup_marker_file(frame, this); - return 0; +int +shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int shard_block_num = (long)cookie; + int call_count = 0; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + if (op_errno == EEXIST) { + LOCK(&frame->lock); + { + local->eexist_count++; + } + UNLOCK(&frame->lock); + } else { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + gf_msg_debug(this->name, 0, + "mknod of shard %d " + "failed: %s", + shard_block_num, strerror(op_errno)); + goto done; } - } - linked_inode = - inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); - inode_unref(local->newloc.inode); - local->newloc.inode = linked_inode; + shard_link_block_inode(local, shard_block_num, inode, buf); - if (local->fop == GF_FOP_UNLINK) - shard_unlink_base_file(frame, this); - else if (local->fop == GF_FOP_RENAME) - shard_rename_src_base_file(frame, this); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); - return 0; -} - -int shard_create_marker_file_under_remove_me(call_frame_t *frame, - xlator_t *this, loc_t *loc) { - int ret = 0; - int op_errno = ENOMEM; - uint64_t bs = 0; - char g1[64] = { - 0, - }; - char g2[64] = { - 0, - }; - dict_t *xattr_req = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - - SHARD_SET_ROOT_FS_ID(frame, local); - - xattr_req = shard_create_gfid_dict(local->xattr_req); - if (!xattr_req) - goto err; - - local->newloc.inode = inode_new(this->itable); - local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); - ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), - (char **)&local->newloc.path); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on " - "pargfid=%s bname=%s", - uuid_utoa_r(priv->dot_shard_rm_gfid, g1), - uuid_utoa_r(loc->inode->gfid, g2)); - goto err; - } - local->newloc.name = strrchr(local->newloc.path, '/'); - if (local->newloc.name) - local->newloc.name++; - - if (local->fop == GF_FOP_UNLINK) - bs = local->block_size; - else if (local->fop == GF_FOP_RENAME) - bs = local->dst_block_size; - - SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, - local->prebuf.ia_size, 0, err); - - STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc, - 0, 0, 0644, xattr_req); - dict_unref(xattr_req); - return 0; +done: + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + local->create_count = 0; + local->post_mknod_handler(frame, this); + } -err: - if (xattr_req) - dict_unref(xattr_req); - shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, - NULL, NULL, NULL, NULL, NULL); - return 0; + return 0; } -int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); - -int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) { - int ret = 0; - shard_local_t *local = NULL; +int +shard_common_resume_mknod(call_frame_t *frame, xlator_t *this, + shard_post_mknod_fop_handler_t post_mknod_handler) +{ + int i = 0; + int shard_idx_iter = 0; + int last_block = 0; + int ret = 0; + int call_count = 0; + char path[PATH_MAX] = { + 0, + }; + mode_t mode = 0; + char *bname = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t ctx_tmp = { + 0, + }; + shard_local_t *local = NULL; + gf_boolean_t wind_failed = _gf_false; + fd_t *fd = NULL; + loc_t loc = { + 0, + }; + dict_t *xattr_req = NULL; - local = frame->local; + local = frame->local; + priv = this->private; + fd = local->fd; + shard_idx_iter = local->first_block; + last_block = local->last_block; + call_count = local->call_count = local->create_count; + local->post_mknod_handler = post_mknod_handler; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } else { - shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); - local->preoldparent = *preparent; - local->postoldparent = *postparent; - if (xdata) - local->xattr_rsp = dict_ref(xdata); - if (local->cleanup_required) - shard_start_background_deletion(this); - } + SHARD_SET_ROOT_FS_ID(frame, local); - if (local->entrylk_frame) { - ret = shard_unlock_entrylk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; + ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get inode " + "ctx for %s", + uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; } - } + mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); - ret = shard_unlock_inodelk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - } - - shard_unlink_cbk(frame, this); - return 0; -} - -int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = frame->local; - - /* To-Do: Request open-fd count on base file */ - STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, - local->xattr_req); - return 0; -} - -int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) { - if (op_ret) - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Unlock failed. Please check brick logs for " - "more details"); - SHARD_STACK_DESTROY(frame); - return 0; -} - -int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) { - loc_t *loc = NULL; - call_frame_t *lk_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *lk_local = NULL; - shard_entrylk_t *lock = NULL; - - local = frame->local; - lk_frame = local->entrylk_frame; - lk_local = lk_frame->local; - local->entrylk_frame = NULL; - lock = &lk_local->int_entrylk; - loc = &lock->loc; - - STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, this->name, loc, - lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, - NULL); - local->int_entrylk.acquired_lock = _gf_false; - return 0; -} - -int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; - - switch (local->fop) { - case GF_FOP_UNLINK: - case GF_FOP_RENAME: - shard_create_marker_file_under_remove_me(frame, this, - &local->int_inodelk.loc); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "post-entrylk handler not defined. This case should not" - " be hit"); - break; - } - return 0; -} - -int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) { - call_frame_t *main_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *main_local = NULL; - - local = frame->local; - main_frame = local->main_frame; - main_local = main_frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); - return 0; - } - main_local->int_entrylk.acquired_lock = _gf_true; - shard_post_entrylk_fop_handler(main_frame, this); - return 0; -} - -int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, - uuid_t gfid) { - char gfid_str[GF_UUID_BUF_SIZE] = { - 0, - }; - shard_local_t *local = NULL; - shard_local_t *entrylk_local = NULL; - shard_entrylk_t *int_entrylk = NULL; - call_frame_t *entrylk_frame = NULL; - - local = frame->local; - entrylk_frame = create_frame(this, this->ctx->pool); - if (!entrylk_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create new frame " - "to lock marker file"); - goto err; - } - - entrylk_local = mem_get0(this->local_pool); - if (!entrylk_local) { - STACK_DESTROY(entrylk_frame->root); - goto err; - } - - entrylk_frame->local = entrylk_local; - entrylk_local->main_frame = frame; - int_entrylk = &entrylk_local->int_entrylk; - - int_entrylk->loc.inode = inode_ref(inode); - set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); - local->entrylk_frame = entrylk_frame; - gf_uuid_unparse(gfid, gfid_str); - int_entrylk->basename = gf_strdup(gfid_str); - - STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, - int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; -} + while (shard_idx_iter <= last_block) { + if (local->inode_list[i]) { + shard_idx_iter++; + i++; + continue; + } -int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; + if (wind_failed) { + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; + } - priv = this->private; - local = frame->local; + shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, + sizeof(path)); + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; + } + + bname = strrchr(path, '/') + 1; + loc.inode = inode_new(this->itable); + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0 || !(loc.inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + "on %s, base file gfid = %s", + bname, uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + loc_wipe(&loc); + dict_unref(xattr_req); + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, + (void *)(long)shard_idx_iter, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, &loc, mode, + ctx_tmp.stat.ia_rdev, 0, xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); + + next: + shard_idx_iter++; + i++; + if (!--call_count) + break; + } - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); return 0; - } - - if (local->prebuf.ia_nlink > 1) { - gf_msg_debug(this->name, 0, "link count on %s > 1:%d, " - "performing rename()/unlink()", - local->int_inodelk.loc.path, local->prebuf.ia_nlink); - if (local->fop == GF_FOP_RENAME) - shard_rename_src_base_file(frame, this); - else if (local->fop == GF_FOP_UNLINK) - shard_unlink_base_file(frame, this); - } else { - gf_msg_debug(this->name, 0, "link count on %s = 1, creating " - "file under .remove_me", - local->int_inodelk.loc.path); - local->cleanup_required = _gf_true; - shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, - local->prebuf.ia_gfid); - } - return 0; -} - -int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; - - switch (local->fop) { - case GF_FOP_UNLINK: - case GF_FOP_RENAME: - shard_lookup_base_file(frame, this, &local->int_inodelk.loc, - shard_post_lookup_base_shard_rm_handler); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "post-inodelk handler not defined. This case should not" - " be hit"); - break; - } - return 0; -} - -int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) { - call_frame_t *main_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *main_local = NULL; - - local = frame->local; - main_frame = local->main_frame; - main_local = main_frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); - return 0; - } - main_local->int_inodelk.acquired_lock = _gf_true; - shard_post_inodelk_fop_handler(main_frame, this); - return 0; -} - -int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) { - call_frame_t *lk_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *lk_local = NULL; - shard_inodelk_t *int_inodelk = NULL; - - local = frame->local; - lk_frame = create_frame(this, this->ctx->pool); - if (!lk_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create new frame " - "to lock base shard"); - goto err; - } - lk_local = mem_get0(this->local_pool); - if (!lk_local) { - STACK_DESTROY(lk_frame->root); - goto err; - } - - lk_frame->local = lk_local; - lk_local->main_frame = frame; - int_inodelk = &lk_local->int_inodelk; - - int_inodelk->flock.l_len = 0; - int_inodelk->flock.l_start = 0; - int_inodelk->domain = this->name; - int_inodelk->flock.l_type = F_WRLCK; - loc_copy(&local->int_inodelk.loc, loc); - set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); - local->inodelk_frame = lk_frame; - - STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, - &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); - return 0; err: - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; + /* + * This block is for handling failure in shard_inode_ctx_get_all(). + * Failures in the while-loop are handled within the loop. + */ + SHARD_UNSET_ROOT_FS_ID(frame, local); + post_mknod_handler(frame, this); + return 0; } -int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { - loc_t *loc = NULL; - shard_local_t *local = NULL; +int +shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); - local = frame->local; +int +shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); - return 0; - } - if (local->fop == GF_FOP_UNLINK) - loc = &local->loc; - else if (local->fop == GF_FOP_RENAME) - loc = &local->loc2; - shard_acquire_inodelk(frame, this, loc); - return 0; -} + local = frame->local; -int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t handler, - shard_internal_dir_type_t type); -int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } - local = frame->local; + if (local->create_count) { + shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); + } else { + shard_readv_do(frame, this); + } - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); return 0; - } - shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, - SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); - return 0; } -void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) { - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; +int +shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - priv = this->private; - local = frame->local; + local = frame->local; - local->dot_shard_rm_loc.inode = - inode_find(this->itable, priv->dot_shard_rm_gfid); - if (!local->dot_shard_rm_loc.inode) { - local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - local->post_res_handler = shard_pre_mkdir_rm_handler; - shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); - } - } else { - local->post_res_handler = shard_post_mkdir_rm_handler; - shard_refresh_internal_dir(frame, this, - SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); - } -} - -int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; - } - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - - loc_copy(&local->loc, loc); - local->xflag = xflag; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - local->block_size = block_size; - local->resolver_base_inode = loc->inode; - local->fop = GF_FOP_UNLINK; - if (!this->itable) - this->itable = (local->loc.inode)->table; - - local->resolve_not = _gf_true; - shard_begin_rm_resolution(frame, this); - return 0; -err: - shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); - return 0; -} + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } -int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) { - shard_rename_cbk(frame, this); - return 0; + if (!local->eexist_count) { + shard_readv_do(frame, this); + } else { + local->call_count = local->eexist_count; + shard_common_lookup_shards(frame, this, local->loc.inode, + shard_post_lookup_shards_readv_handler); + } + return 0; } -int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) { - int ret = 0; - shard_local_t *local = NULL; - - local = frame->local; +int +shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } - /* Set ctx->refresh to TRUE to force a lookup on disk when - * shard_lookup_base_file() is called next to refresh the hard link - * count in ctx. Note that this is applicable only to the case where - * the rename dst is already existent and sharded. - */ - if ((local->dst_block_size) && (!local->cleanup_required)) - shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); - - local->prebuf = *buf; - local->preoldparent = *preoldparent; - local->postoldparent = *postoldparent; - local->prenewparent = *prenewparent; - local->postnewparent = *postnewparent; - if (xdata) - local->xattr_rsp = dict_ref(xdata); + local = frame->local; - if (local->dst_block_size) { - if (local->entrylk_frame) { - ret = shard_unlock_entrylk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - } + if (local->op_ret < 0) { + if (local->op_errno != ENOENT) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } else { + struct iovec vec = { + 0, + }; + + vec.iov_base = local->iobuf->ptr; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, + &local->prebuf, local->iobref, NULL); + return 0; + } } - ret = shard_unlock_inodelk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - goto err; - } - if (local->cleanup_required) - shard_start_background_deletion(this); - } - - /* Now the base file of src, if sharded, is looked up to gather ia_size - * and ia_blocks.*/ - if (local->block_size) { - local->tmp_loc.inode = inode_new(this->itable); - gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); - shard_lookup_base_file(frame, this, &local->tmp_loc, - shard_post_rename_lookup_handler); - } else { - shard_rename_cbk(frame, this); - } - return 0; -err: - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; -} - -int shard_post_lookup_dst_base_file_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; + if (local->call_count) { + shard_common_lookup_shards(frame, this, local->resolver_base_inode, + shard_post_lookup_shards_readv_handler); + } else { + shard_readv_do(frame, this); + } - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); return 0; - } - - /* Save dst base file attributes into postbuf so the information is not - * lost when it is overwritten after lookup on base file of src in - * shard_lookup_base_file_cbk(). - */ - local->postbuf = local->prebuf; - shard_rename_src_base_file(frame, this); - return 0; -} - -int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - uint64_t dst_block_size = 0; - shard_local_t *local = NULL; - - if (IA_ISDIR(oldloc->inode->ia_type)) { - STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - return 0; - } - - ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); - if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(oldloc->inode->gfid)); - goto err; - } - - if (newloc->inode) - ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size); - - /* The following stack_wind covers the case where: - * a. the src file is not sharded and dst doesn't exist, OR - * b. the src and dst both exist but are not sharded. - */ - if (((!block_size) && (!dst_block_size)) || - frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - return 0; - } - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); - local->resolver_base_inode = newloc->inode; - local->fop = GF_FOP_RENAME; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - - local->block_size = block_size; - local->dst_block_size = dst_block_size; - if (!this->itable) - this->itable = (local->loc.inode)->table; - local->resolve_not = _gf_true; - - /* The following if-block covers the case where the dst file exists - * and is sharded. - */ - if (local->dst_block_size) { - shard_begin_rm_resolution(frame, this); - } else { - /* The following block covers the case where the dst either doesn't - * exist or is NOT sharded but the src is sharded. In this case, shard - * xlator would go ahead and rename src to dst. Once done, it would also - * lookup the base shard of src to get the ia_size and ia_blocks xattr - * values. - */ - shard_rename_src_base_file(frame, this); - } - return 0; - -err: - shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); - return 0; } -int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) { - int ret = -1; - shard_local_t *local = NULL; +int +shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + struct iobuf *iobuf = NULL; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; - local = frame->local; + priv = this->private; + local = frame->local; - if (op_ret == -1) - goto unwind; + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } - ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, - SHARD_ALL_MASK); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, - "Failed to set inode " - "ctx for %s", - uuid_utoa(inode->gfid)); + if (local->offset >= local->prebuf.ia_size) { + /* If the read is being performed past the end of the file, + * unwind the FOP with 0 bytes read as status. + */ + struct iovec vec = { + 0, + }; -unwind: - SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, - preparent, postparent, xdata); - return 0; -} + iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); + if (!iobuf) + goto err; -int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; + vec.iov_base = iobuf->ptr; + vec.iov_len = 0; + local->iobref = iobref_new(); + iobref_add(local->iobref, iobuf); + iobuf_unref(iobuf); - priv = this->private; - local = mem_get0(this->local_pool); - if (!local) - goto err; + SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, + local->iobref, NULL); + return 0; + } - frame->local = local; - local->block_size = priv->block_size; + local->first_block = get_lowest_block(local->offset, local->block_size); - if (!__is_gsyncd_on_shard_dir(frame, loc)) { - SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); - } + local->total_size = local->req_size; - STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, - xdata); - return 0; -err: - shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); - return 0; -} - -int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - /* To-Do: Handle open with O_TRUNC under locks */ - SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); - return 0; -} - -int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) { - STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; -} - -int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) { - int i = 0; - int call_count = 0; - void *address = NULL; - uint64_t block_num = 0; - off_t off = 0; - struct iovec vec = { - 0, - }; - shard_local_t *local = NULL; - fd_t *anon_fd = cookie; - shard_inode_ctx_t *ctx = NULL; - - local = frame->local; - - /* If shard has already seen a failure here before, there is no point - * in aggregating subsequent reads, so just go to out. - */ - if (local->op_ret < 0) - goto out; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto out; - } + local->last_block = get_highest_block(local->offset, local->total_size, + local->block_size); - if (local->op_ret >= 0) - local->op_ret += op_ret; + local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); + local->resolver_base_inode = local->loc.inode; - shard_inode_ctx_get(anon_fd->inode, this, &ctx); - block_num = ctx->block_num; - - if (block_num == local->first_block) { - address = local->iobuf->ptr; - } else { - /* else - * address to start writing to = beginning of buffer + - * number of bytes until end of first block + - * + block_size times number of blocks - * between the current block and the first - */ - address = (char *)local->iobuf->ptr + - (local->block_size - (local->offset % local->block_size)) + - ((block_num - local->first_block - 1) * local->block_size); - } + local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) + goto err; - for (i = 0; i < count; i++) { - address = (char *)address + off; - memcpy(address, vector[i].iov_base, vector[i].iov_len); - off += vector[i].iov_len; - } + iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); + if (!iobuf) + goto err; -out: - if (anon_fd) - fd_unref(anon_fd); - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - } else { - if (xdata) - local->xattr_rsp = dict_ref(xdata); - vec.iov_base = local->iobuf->ptr; - if (local->offset + local->req_size > local->prebuf.ia_size) - local->total_size = local->prebuf.ia_size - local->offset; - vec.iov_len = local->total_size; - local->op_ret = local->total_size; - SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, - &local->prebuf, local->iobref, local->xattr_rsp); - return 0; - } - } - - return 0; -} - -int shard_readv_do(call_frame_t *frame, xlator_t *this) { - int i = 0; - int call_count = 0; - int last_block = 0; - int cur_block = 0; - off_t orig_offset = 0; - off_t shard_offset = 0; - size_t read_size = 0; - size_t remaining_size = 0; - fd_t *fd = NULL; - fd_t *anon_fd = NULL; - shard_local_t *local = NULL; - gf_boolean_t wind_failed = _gf_false; - - local = frame->local; - fd = local->fd; - - orig_offset = local->offset; - cur_block = local->first_block; - last_block = local->last_block; - remaining_size = local->total_size; - local->call_count = call_count = local->num_blocks; - - SHARD_SET_ROOT_FS_ID(frame, local); - - if (fd->flags & O_DIRECT) - local->flags = O_DIRECT; - - while (cur_block <= last_block) { - if (wind_failed) { - shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0, - NULL, NULL, NULL); - goto next; - } - - shard_offset = orig_offset % local->block_size; - read_size = local->block_size - shard_offset; - if (read_size > remaining_size) - read_size = remaining_size; - - remaining_size -= read_size; - - if (cur_block == 0) { - anon_fd = fd_ref(fd); - } else { - anon_fd = fd_anonymous(local->inode_list[i]); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL, - 0, NULL, NULL, NULL); - goto next; - } + local->iobref = iobref_new(); + if (!local->iobref) { + iobuf_unref(iobuf); + goto err; } - STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, anon_fd, read_size, - shard_offset, local->flags, local->xattr_req); + if (iobref_add(local->iobref, iobuf) != 0) { + iobuf_unref(iobuf); + goto err; + } - orig_offset += read_size; - next: - cur_block++; - i++; - call_count--; - } - return 0; -} + memset(iobuf->ptr, 0, local->total_size); + iobuf_unref(iobuf); + local->iobuf = iobuf; -int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) { - int shard_block_num = (long)cookie; - int call_count = 0; - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret < 0) { - if (op_errno == EEXIST) { - LOCK(&frame->lock); - { local->eexist_count++; } - UNLOCK(&frame->lock); + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + ret = shard_init_internal_dir_loc(this, local, + SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; + shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); } else { - local->op_ret = op_ret; - local->op_errno = op_errno; + local->post_res_handler = shard_post_resolve_readv_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); } - gf_msg_debug(this->name, 0, "mknod of shard %d " - "failed: %s", - shard_block_num, strerror(op_errno)); - goto done; - } - - shard_link_block_inode(local, shard_block_num, inode, buf); + return 0; +err: + shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); + return 0; +} -done: - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); - local->create_count = 0; - local->post_mknod_handler(frame, this); - } - - return 0; -} - -int shard_common_resume_mknod( - call_frame_t *frame, xlator_t *this, - shard_post_mknod_fop_handler_t post_mknod_handler) { - int i = 0; - int shard_idx_iter = 0; - int last_block = 0; - int ret = 0; - int call_count = 0; - char path[PATH_MAX] = { - 0, - }; - mode_t mode = 0; - char *bname = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t ctx_tmp = { - 0, - }; - shard_local_t *local = NULL; - gf_boolean_t wind_failed = _gf_false; - fd_t *fd = NULL; - loc_t loc = { - 0, - }; - dict_t *xattr_req = NULL; - - local = frame->local; - priv = this->private; - fd = local->fd; - shard_idx_iter = local->first_block; - last_block = local->last_block; - call_count = local->call_count = local->create_count; - local->post_mknod_handler = post_mknod_handler; - - SHARD_SET_ROOT_FS_ID(frame, local); - - ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get inode " - "ctx for %s", - uuid_utoa(fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); +int +shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int ret = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; - while (shard_idx_iter <= last_block) { - if (local->inode_list[i]) { - shard_idx_iter++; - i++; - continue; + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto err; } - if (wind_failed) { - shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, - ENOMEM, NULL, NULL, NULL, NULL, NULL); - goto next; + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + /* block_size = 0 means that the file was created before + * sharding was enabled on the volume. + */ + STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; } - shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, - sizeof(path)); - - xattr_req = shard_create_gfid_dict(local->xattr_req); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, - ENOMEM, NULL, NULL, NULL, NULL, NULL); - goto next; - } + if (!this->itable) + this->itable = fd->inode->table; - bname = strrchr(path, '/') + 1; - loc.inode = inode_new(this->itable); - loc.parent = inode_ref(priv->dot_shard_inode); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0 || !(loc.inode)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - "on %s, base file gfid = %s", - bname, uuid_utoa(fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - loc_wipe(&loc); - dict_unref(xattr_req); - shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, - ENOMEM, NULL, NULL, NULL, NULL, NULL); - goto next; - } + local = mem_get0(this->local_pool); + if (!local) + goto err; - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; + frame->local = local; - STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, - (void *)(long)shard_idx_iter, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, &loc, mode, - ctx_tmp.stat.ia_rdev, 0, xattr_req); - loc_wipe(&loc); - dict_unref(xattr_req); + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + local->fd = fd_ref(fd); + local->block_size = block_size; + local->offset = offset; + local->req_size = size; + local->flags = flags; + local->fop = GF_FOP_READ; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - next: - shard_idx_iter++; - i++; - if (!--call_count) - break; - } + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - return 0; + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_readv_handler); + return 0; err: - /* - * This block is for handling failure in shard_inode_ctx_get_all(). - * Failures in the while-loop are handled within the loop. - */ - SHARD_UNSET_ROOT_FS_ID(frame, local); - post_mknod_handler(frame, this); - return 0; + shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); + return 0; } -int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); - -int shard_post_lookup_shards_readv_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; +int +shard_common_inode_write_post_update_size_handler(call_frame_t *frame, + xlator_t *this) +{ + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + } else { + shard_common_inode_write_success_unwind(local->fop, frame, + local->written_size); + } return 0; - } - - if (local->create_count) { - shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); - } else { - shard_readv_do(frame, this); - } - - return 0; } -int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; +static gf_boolean_t +shard_is_appending_write(shard_local_t *local) +{ + if (local->fop != GF_FOP_WRITE) + return _gf_false; + if (local->flags & O_APPEND) + return _gf_true; + if (local->fd->flags & O_APPEND) + return _gf_true; + return _gf_false; +} - local = frame->local; +int +__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - return 0; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - if (!local->eexist_count) { - shard_readv_do(frame, this); - } else { - local->call_count = local->eexist_count; - shard_common_lookup_shards(frame, this, local->loc.inode, - shard_post_lookup_shards_readv_handler); - } - return 0; -} + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; -int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; + if (shard_is_appending_write(local)) { + local->delta_size = local->total_size; + } else if (local->offset + local->total_size > ctx->stat.ia_size) { + local->delta_size = (local->offset + local->total_size) - + ctx->stat.ia_size; + } else { + local->delta_size = 0; + } + ctx->stat.ia_size += (local->delta_size); + local->postbuf = ctx->stat; - local = frame->local; + return 0; +} - if (local->op_ret < 0) { - if (local->op_errno != ENOENT) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - return 0; - } else { - struct iovec vec = { - 0, - }; +int +shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) +{ + int ret = -1; - vec.iov_base = local->iobuf->ptr; - vec.iov_len = local->total_size; - local->op_ret = local->total_size; - SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, - &local->prebuf, local->iobref, NULL); - return 0; + LOCK(&inode->lock); + { + ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); } - } + UNLOCK(&inode->lock); - if (local->call_count) { - shard_common_lookup_shards(frame, this, local->resolver_base_inode, - shard_post_lookup_shards_readv_handler); - } else { - shard_readv_do(frame, this); - } - - return 0; + return ret; } -int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { - int ret = 0; - struct iobuf *iobuf = NULL; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; +int +shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + int call_count = 0; + fd_t *anon_fd = cookie; + shard_local_t *local = NULL; + glusterfs_fop_t fop = 0; - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - return 0; - } + local = frame->local; + fop = local->fop; - if (local->offset >= local->prebuf.ia_size) { - /* If the read is being performed past the end of the file, - * unwind the FOP with 0 bytes read as status. - */ - struct iovec vec = { - 0, - }; + LOCK(&frame->lock); + { + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + local->written_size += op_ret; + GF_ATOMIC_ADD(local->delta_blocks, + post->ia_blocks - pre->ia_blocks); + local->delta_size += (post->ia_size - pre->ia_size); + shard_inode_ctx_set(local->fd->inode, this, post, 0, + SHARD_MASK_TIMES); + if (local->fd->inode != anon_fd->inode) + shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, + anon_fd->inode); + } + } + UNLOCK(&frame->lock); - iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); - if (!iobuf) - goto err; + if (anon_fd) + fd_unref(anon_fd); - vec.iov_base = iobuf->ptr; - vec.iov_len = 0; - local->iobref = iobref_new(); - iobref_add(local->iobref, iobuf); - iobuf_unref(iobuf); + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { + shard_common_failure_unwind(fop, frame, local->op_ret, + local->op_errno); + } else { + shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); + local->hole_size = 0; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + shard_update_file_size( + frame, this, local->fd, NULL, + shard_common_inode_write_post_update_size_handler); + } + } - SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, - local->iobref, NULL); return 0; - } +} - local->first_block = get_lowest_block(local->offset, local->block_size); +int +shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vec, int count, off_t shard_offset, + size_t size) +{ + shard_local_t *local = NULL; - local->total_size = local->req_size; + local = frame->local; - local->last_block = - get_highest_block(local->offset, local->total_size, local->block_size); + switch (local->fop) { + case GF_FOP_WRITE: + STACK_WIND_COOKIE( + frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset, + local->flags, local->iobref, local->xattr_req); + break; + case GF_FOP_FALLOCATE: + STACK_WIND_COOKIE( + frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, local->flags, + shard_offset, size, local->xattr_req); + break; + case GF_FOP_ZEROFILL: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, + shard_offset, size, local->xattr_req); + break; + case GF_FOP_DISCARD: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, + shard_offset, size, local->xattr_req); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", local->fop); + break; + } + return 0; +} - local->num_blocks = local->last_block - local->first_block + 1; - GF_ASSERT(local->num_blocks > 0); - local->resolver_base_inode = local->loc.inode; +int +shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) +{ + int i = 0; + int count = 0; + int call_count = 0; + int last_block = 0; + uint32_t cur_block = 0; + fd_t *fd = NULL; + fd_t *anon_fd = NULL; + shard_local_t *local = NULL; + struct iovec *vec = NULL; + gf_boolean_t wind_failed = _gf_false; + gf_boolean_t odirect = _gf_false; + off_t orig_offset = 0; + off_t shard_offset = 0; + off_t vec_offset = 0; + size_t remaining_size = 0; + size_t shard_write_size = 0; - local->inode_list = - GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); - if (!local->inode_list) - goto err; + local = frame->local; + fd = local->fd; + + orig_offset = local->offset; + remaining_size = local->total_size; + cur_block = local->first_block; + local->call_count = call_count = local->num_blocks; + last_block = local->last_block; + + SHARD_SET_ROOT_FS_ID(frame, local); + + if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC + " into " + "dict: %s", + uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + local->call_count = 1; + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, + ENOMEM, NULL, NULL, NULL); + return 0; + } - iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); - if (!iobuf) - goto err; + if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) + odirect = _gf_true; - local->iobref = iobref_new(); - if (!local->iobref) { - iobuf_unref(iobuf); - goto err; - } + while (cur_block <= last_block) { + if (wind_failed) { + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } - if (iobref_add(local->iobref, iobuf) != 0) { - iobuf_unref(iobuf); - goto err; - } + shard_offset = orig_offset % local->block_size; + shard_write_size = local->block_size - shard_offset; + if (shard_write_size > remaining_size) + shard_write_size = remaining_size; + + remaining_size -= shard_write_size; + + if (local->fop == GF_FOP_WRITE) { + count = iov_subset(local->vector, local->count, vec_offset, + vec_offset + shard_write_size, NULL); + + vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); + if (!vec) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + GF_FREE(vec); + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, + -1, ENOMEM, NULL, NULL, NULL); + goto next; + } + count = iov_subset(local->vector, local->count, vec_offset, + vec_offset + shard_write_size, vec); + } - memset(iobuf->ptr, 0, local->total_size); - iobuf_unref(iobuf); - local->iobuf = iobuf; + if (cur_block == 0) { + anon_fd = fd_ref(fd); + } else { + anon_fd = fd_anonymous(local->inode_list[i]); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + GF_FREE(vec); + shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, + this, -1, ENOMEM, NULL, NULL, + NULL); + goto next; + } + + if (local->fop == GF_FOP_WRITE) { + if (odirect) + local->flags = O_DIRECT; + else + local->flags = GF_ANON_FD_FLAGS; + } + } - local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = - shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret) - goto err; - shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - local->post_res_handler = shard_post_resolve_readv_handler; - shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); - } - return 0; -err: - shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); - return 0; -} - -int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) { - int ret = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa(fd->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - /* block_size = 0 means that the file was created before - * sharding was enabled on the volume. - */ - STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); - return 0; - } - - if (!this->itable) - this->itable = fd->inode->table; - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - - ret = syncbarrier_init(&local->barrier); - if (ret) - goto err; - local->fd = fd_ref(fd); - local->block_size = block_size; - local->offset = offset; - local->req_size = size; - local->flags = flags; - local->fop = GF_FOP_READ; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_readv_handler); - return 0; -err: - shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); - return 0; + shard_common_inode_write_wind(frame, this, anon_fd, vec, count, + shard_offset, shard_write_size); + if (vec) + vec_offset += shard_write_size; + orig_offset += shard_write_size; + GF_FREE(vec); + vec = NULL; + next: + cur_block++; + i++; + call_count--; + } + return 0; } -int shard_common_inode_write_post_update_size_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; +int +shard_common_inode_write_post_mknod_handler(call_frame_t *frame, + xlator_t *this); - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - } else { - shard_common_inode_write_success_unwind(local->fop, frame, - local->written_size); - } - return 0; -} +int +shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this) +{ + shard_local_t *local = NULL; -static gf_boolean_t shard_is_appending_write(shard_local_t *local) { - if (local->fop != GF_FOP_WRITE) - return _gf_false; - if (local->flags & O_APPEND) - return _gf_true; - if (local->fd->flags & O_APPEND) - return _gf_true; - return _gf_false; -} + local = frame->local; -int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + if (local->create_count) { + shard_common_resume_mknod(frame, this, + shard_common_inode_write_post_mknod_handler); + } else { + shard_common_inode_write_do(frame, this); + } - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + return 0; +} - if (shard_is_appending_write(local)) { - local->delta_size = local->total_size; - } else if (local->offset + local->total_size > ctx->stat.ia_size) { - local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; - } else { - local->delta_size = 0; - } - ctx->stat.ia_size += (local->delta_size); - local->postbuf = ctx->stat; +int +shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - return 0; -} + local = frame->local; -int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) { - int ret = -1; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - LOCK(&inode->lock); - { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); } - UNLOCK(&inode->lock); + if (!local->eexist_count) { + shard_common_inode_write_do(frame, this); + } else { + local->call_count = local->eexist_count; + shard_common_lookup_shards( + frame, this, local->loc.inode, + shard_common_inode_write_post_lookup_shards_handler); + } - return ret; + return 0; } -int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) { - int call_count = 0; - fd_t *anon_fd = cookie; - shard_local_t *local = NULL; - glusterfs_fop_t fop = 0; +int +shard_common_inode_write_post_resolve_handler(call_frame_t *frame, + xlator_t *this) +{ + shard_local_t *local = NULL; - local = frame->local; - fop = local->fop; + local = frame->local; - LOCK(&frame->lock); - { - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } else { - local->written_size += op_ret; - GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks); - local->delta_size += (post->ia_size - pre->ia_size); - shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); - if (local->fd->inode != anon_fd->inode) - shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, - anon_fd->inode); - } - } - UNLOCK(&frame->lock); - - if (anon_fd) - fd_unref(anon_fd); - - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); if (local->op_ret < 0) { - shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno); + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + if (local->call_count) { + shard_common_lookup_shards( + frame, this, local->resolver_base_inode, + shard_common_inode_write_post_lookup_shards_handler); } else { - shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); - local->hole_size = 0; - if (xdata) - local->xattr_rsp = dict_ref(xdata); - shard_update_file_size(frame, this, local->fd, NULL, - shard_common_inode_write_post_update_size_handler); + shard_common_inode_write_do(frame, this); } - } - return 0; + return 0; } -int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vec, int count, - off_t shard_offset, size_t size) { - shard_local_t *local = NULL; +int +shard_common_inode_write_post_lookup_handler(call_frame_t *frame, + xlator_t *this) +{ + shard_local_t *local = frame->local; + shard_priv_t *priv = this->private; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - local = frame->local; + local->postbuf = local->prebuf; + + /*Adjust offset to EOF so that correct shard is chosen for append*/ + if (shard_is_appending_write(local)) + local->offset = local->prebuf.ia_size; + + local->first_block = get_lowest_block(local->offset, local->block_size); + local->last_block = get_highest_block(local->offset, local->total_size, + local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; + GF_ASSERT(local->num_blocks > 0); + local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), + gf_shard_mt_inode_list); + if (!local->inode_list) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } - switch (local->fop) { - case GF_FOP_WRITE: - STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, - vec, count, shard_offset, local->flags, local->iobref, - local->xattr_req); - break; - case GF_FOP_FALLOCATE: - STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, - local->flags, shard_offset, size, local->xattr_req); - break; - case GF_FOP_ZEROFILL: - STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, - shard_offset, size, local->xattr_req); - break; - case GF_FOP_DISCARD: - STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, - shard_offset, size, local->xattr_req); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", local->fop); - break; - } - return 0; -} - -int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) { - int i = 0; - int count = 0; - int call_count = 0; - int last_block = 0; - uint32_t cur_block = 0; - fd_t *fd = NULL; - fd_t *anon_fd = NULL; - shard_local_t *local = NULL; - struct iovec *vec = NULL; - gf_boolean_t wind_failed = _gf_false; - gf_boolean_t odirect = _gf_false; - off_t orig_offset = 0; - off_t shard_offset = 0; - off_t vec_offset = 0; - size_t remaining_size = 0; - size_t shard_write_size = 0; - - local = frame->local; - fd = local->fd; - - orig_offset = local->offset; - remaining_size = local->total_size; - cur_block = local->first_block; - local->call_count = call_count = local->num_blocks; - last_block = local->last_block; - - SHARD_SET_ROOT_FS_ID(frame, local); - - if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into " - "dict: %s", - uuid_utoa(fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - local->call_count = 1; - shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, - NULL, NULL, NULL); - return 0; - } + gf_msg_trace(this->name, 0, + "%s: gfid=%s first_block=%" PRIu64 + " " + "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64 + " total_size=%zu flags=%" PRId32 "", + gf_fop_list[local->fop], + uuid_utoa(local->resolver_base_inode->gfid), + local->first_block, local->last_block, local->num_blocks, + local->offset, local->total_size, local->flags); - if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) - odirect = _gf_true; + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); - while (cur_block <= last_block) { - if (wind_failed) { - shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, - NULL, NULL, NULL); - goto next; + if (!local->dot_shard_loc.inode) { + /*change handler*/ + shard_mkdir_internal_dir(frame, this, + shard_common_inode_write_post_resolve_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + /*change handler*/ + local->post_res_handler = shard_common_inode_write_post_resolve_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); } + return 0; +} - shard_offset = orig_offset % local->block_size; - shard_write_size = local->block_size - shard_offset; - if (shard_write_size > remaining_size) - shard_write_size = remaining_size; +int +shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + inode_t *link_inode = NULL; + shard_local_t *local = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - remaining_size -= shard_write_size; + local = frame->local; - if (local->fop == GF_FOP_WRITE) { - count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, NULL); + SHARD_UNSET_ROOT_FS_ID(frame, local); - vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); - if (!vec) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - GF_FREE(vec); - shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, vec); + if (op_ret == -1) { + if (op_errno != EEXIST) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } else { + gf_msg_debug(this->name, 0, + "mkdir on %s failed " + "with EEXIST. Attempting lookup now", + shard_internal_dir_string(type)); + shard_lookup_internal_dir(frame, this, local->post_res_handler, + type); + return 0; + } } - if (cur_block == 0) { - anon_fd = fd_ref(fd); + link_inode = shard_link_internal_dir_inode(local, inode, buf, type); + if (link_inode != inode) { + shard_refresh_internal_dir(frame, this, type); } else { - anon_fd = fd_anonymous(local->inode_list[i]); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - GF_FREE(vec); - shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - - if (local->fop == GF_FOP_WRITE) { - if (odirect) - local->flags = O_DIRECT; - else - local->flags = GF_ANON_FD_FLAGS; - } - } - - shard_common_inode_write_wind(frame, this, anon_fd, vec, count, - shard_offset, shard_write_size); - if (vec) - vec_offset += shard_write_size; - orig_offset += shard_write_size; - GF_FREE(vec); - vec = NULL; - next: - cur_block++; - i++; - call_count--; - } - return 0; + shard_inode_ctx_mark_dir_refreshed(link_inode, this); + shard_common_resolve_shards(frame, this, local->post_res_handler); + } + return 0; +unwind: + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; } -int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, - xlator_t *this); +int +shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler, + shard_internal_dir_type_t type) +{ + int ret = -1; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + dict_t *xattr_req = NULL; + uuid_t *gfid = NULL; + loc_t *loc = NULL; + gf_boolean_t free_gfid = _gf_true; -int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; + local = frame->local; + priv = this->private; - local = frame->local; + local->post_res_handler = handler; + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid) + goto err; - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; + default: + bzero(*gfid, sizeof(uuid_t)); + break; + } - if (local->create_count) { - shard_common_resume_mknod(frame, this, - shard_common_inode_write_post_mknod_handler); - } else { - shard_common_inode_write_do(frame, this); - } + xattr_req = dict_new(); + if (!xattr_req) + goto err; - return 0; -} + ret = shard_init_internal_dir_loc(this, local, type); + if (ret) + goto err; -int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; + ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid-req for %s", + shard_internal_dir_string(type)); + goto err; + } else { + free_gfid = _gf_false; + } - local = frame->local; + SHARD_SET_ROOT_FS_ID(frame, local); - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); + STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, + 0755, 0, xattr_req); + dict_unref(xattr_req); return 0; - } - if (!local->eexist_count) { - shard_common_inode_write_do(frame, this); - } else { - local->call_count = local->eexist_count; - shard_common_lookup_shards( - frame, this, local->loc.inode, - shard_common_inode_write_post_lookup_shards_handler); - } - - return 0; +err: + if (xattr_req) + dict_unref(xattr_req); + local->op_ret = -1; + local->op_errno = ENOMEM; + if (free_gfid) + GF_FREE(gfid); + handler(frame, this); + return 0; } -int shard_common_inode_write_post_resolve_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); +int +shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + /* To-Do: Wind flush on all shards of the file */ + SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); return 0; - } - - if (local->call_count) { - shard_common_lookup_shards( - frame, this, local->resolver_base_inode, - shard_common_inode_write_post_lookup_shards_handler); - } else { - shard_common_inode_write_do(frame, this); - } - - return 0; } -int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, - xlator_t *this) { - shard_local_t *local = frame->local; - shard_priv_t *priv = this->private; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); +int +shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); return 0; - } - - local->postbuf = local->prebuf; +} - /*Adjust offset to EOF so that correct shard is chosen for append*/ - if (shard_is_appending_write(local)) - local->offset = local->prebuf.ia_size; +int +__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) +{ + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - local->first_block = get_lowest_block(local->offset, local->block_size); - local->last_block = - get_highest_block(local->offset, local->total_size, local->block_size); - local->num_blocks = local->last_block - local->first_block + 1; - GF_ASSERT(local->num_blocks > 0); - local->inode_list = - GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); - if (!local->inode_list) { - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - gf_msg_trace( - this->name, 0, "%s: gfid=%s first_block=%" PRIu64 " " - "last_block=%" PRIu64 " num_blocks=%" PRIu64 - " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", - gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), - local->first_block, local->last_block, local->num_blocks, local->offset, - local->total_size, local->flags); + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + local->postbuf.ia_ctime = ctx->stat.ia_ctime; + local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; + local->postbuf.ia_atime = ctx->stat.ia_atime; + local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; + local->postbuf.ia_mtime = ctx->stat.ia_mtime; + local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; - if (!local->dot_shard_loc.inode) { - /*change handler*/ - shard_mkdir_internal_dir(frame, this, - shard_common_inode_write_post_resolve_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - /*change handler*/ - local->post_res_handler = shard_common_inode_write_post_resolve_handler; - shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); - } - return 0; + return 0; } -int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) { - inode_t *link_inode = NULL; - shard_local_t *local = NULL; - shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - - local = frame->local; - - SHARD_UNSET_ROOT_FS_ID(frame, local); - - if (op_ret == -1) { - if (op_errno != EEXIST) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } else { - gf_msg_debug(this->name, 0, "mkdir on %s failed " - "with EEXIST. Attempting lookup now", - shard_internal_dir_string(type)); - shard_lookup_internal_dir(frame, this, local->post_res_handler, type); - return 0; - } - } - - link_inode = shard_link_internal_dir_inode(local, inode, buf, type); - if (link_inode != inode) { - shard_refresh_internal_dir(frame, this, type); - } else { - shard_inode_ctx_mark_dir_refreshed(link_inode, this); - shard_common_resolve_shards(frame, this, local->post_res_handler); - } - return 0; -unwind: - shard_common_resolve_shards(frame, this, local->post_res_handler); - return 0; -} - -int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t handler, - shard_internal_dir_type_t type) { - int ret = -1; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - dict_t *xattr_req = NULL; - uuid_t *gfid = NULL; - loc_t *loc = NULL; - gf_boolean_t free_gfid = _gf_true; - - local = frame->local; - priv = this->private; - - local->post_res_handler = handler; - gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); - if (!gfid) - goto err; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - gf_uuid_copy(*gfid, priv->dot_shard_gfid); - loc = &local->dot_shard_loc; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); - loc = &local->dot_shard_rm_loc; - break; - default: - bzero(*gfid, sizeof(uuid_t)); - break; - } - - xattr_req = dict_new(); - if (!xattr_req) - goto err; - - ret = shard_init_internal_dir_loc(this, local, type); - if (ret) - goto err; - - ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set gfid-req for %s", shard_internal_dir_string(type)); - goto err; - } else { - free_gfid = _gf_false; - } - - SHARD_SET_ROOT_FS_ID(frame, local); - - STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, - 0755, 0, xattr_req); - dict_unref(xattr_req); - return 0; +int +shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) +{ + int ret = 0; -err: - if (xattr_req) - dict_unref(xattr_req); - local->op_ret = -1; - local->op_errno = ENOMEM; - if (free_gfid) - GF_FREE(gfid); - handler(frame, this); - return 0; -} + LOCK(&inode->lock); + { + ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); + } + UNLOCK(&inode->lock); -int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) { - /* To-Do: Wind flush on all shards of the file */ - SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); - return 0; + return ret; } -int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd, xdata); - return 0; -} +int +shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int call_count = 0; + uint64_t fsync_count = 0; + fd_t *anon_fd = cookie; + shard_local_t *local = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; + inode_t *base_inode = NULL; + gf_boolean_t unref_shard_inode = _gf_false; + + local = frame->local; + base_inode = local->fd->inode; -int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) { - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + if (local->op_ret < 0) + goto out; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + LOCK(&frame->lock); + { + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + UNLOCK(&frame->lock); + goto out; + } + shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, + SHARD_MASK_TIMES); + } + UNLOCK(&frame->lock); + fd_ctx_get(anon_fd, this, &fsync_count); +out: + if (anon_fd && (base_inode != anon_fd->inode)) { + LOCK(&base_inode->lock); + LOCK(&anon_fd->inode->lock); + { + __shard_inode_ctx_get(anon_fd->inode, this, &ctx); + __shard_inode_ctx_get(base_inode, this, &base_ictx); + if (op_ret == 0) + ctx->fsync_needed -= fsync_count; + GF_ASSERT(ctx->fsync_needed >= 0); + if (ctx->fsync_needed != 0) { + list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); + base_ictx->fsync_count++; + } else { + unref_shard_inode = _gf_true; + } + } + UNLOCK(&anon_fd->inode->lock); + UNLOCK(&base_inode->lock); + } - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + if (unref_shard_inode) + inode_unref(anon_fd->inode); + if (anon_fd) + fd_unref(anon_fd); - local->postbuf.ia_ctime = ctx->stat.ia_ctime; - local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; - local->postbuf.ia_atime = ctx->stat.ia_atime; - local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; - local->postbuf.ia_mtime = ctx->stat.ia_mtime; - local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; + call_count = shard_call_count_return(frame); + if (call_count != 0) + return 0; - return 0; + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); + } else { + shard_get_timestamps_from_inode_ctx(local, base_inode, this); + SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } + return 0; } -int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) { - int ret = 0; +int +shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) +{ + int ret = 0; + int call_count = 0; + int fsync_count = 0; + fd_t *anon_fd = NULL; + inode_t *base_inode = NULL; + shard_local_t *local = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *iter = NULL; + struct list_head copy = { + 0, + }; + shard_inode_ctx_t *tmp = NULL; - LOCK(&inode->lock); - { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); } - UNLOCK(&inode->lock); + local = frame->local; + base_inode = local->fd->inode; + local->postbuf = local->prebuf; + INIT_LIST_HEAD(©); - return ret; -} + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); + return 0; + } -int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) { - int call_count = 0; - uint64_t fsync_count = 0; - fd_t *anon_fd = cookie; - shard_local_t *local = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *base_ictx = NULL; - inode_t *base_inode = NULL; - gf_boolean_t unref_shard_inode = _gf_false; - - local = frame->local; - base_inode = local->fd->inode; - - if (local->op_ret < 0) - goto out; - - LOCK(&frame->lock); - { - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - UNLOCK(&frame->lock); - goto out; - } - shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES); - } - UNLOCK(&frame->lock); - fd_ctx_get(anon_fd, this, &fsync_count); -out: - if (anon_fd && (base_inode != anon_fd->inode)) { LOCK(&base_inode->lock); - LOCK(&anon_fd->inode->lock); { - __shard_inode_ctx_get(anon_fd->inode, this, &ctx); - __shard_inode_ctx_get(base_inode, this, &base_ictx); - if (op_ret == 0) - ctx->fsync_needed -= fsync_count; - GF_ASSERT(ctx->fsync_needed >= 0); - if (ctx->fsync_needed != 0) { - list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); - base_ictx->fsync_count++; - } else { - unref_shard_inode = _gf_true; - } - } - UNLOCK(&anon_fd->inode->lock); + __shard_inode_ctx_get(base_inode, this, &ctx); + list_splice_init(&ctx->to_fsync_list, ©); + call_count = ctx->fsync_count; + ctx->fsync_count = 0; + } UNLOCK(&base_inode->lock); - } - - if (unref_shard_inode) - inode_unref(anon_fd->inode); - if (anon_fd) - fd_unref(anon_fd); - - call_count = shard_call_count_return(frame); - if (call_count != 0) - return 0; - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, - local->op_errno); - } else { - shard_get_timestamps_from_inode_ctx(local, base_inode, this); - SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, local->xattr_rsp); - } - return 0; -} - -int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) { - int ret = 0; - int call_count = 0; - int fsync_count = 0; - fd_t *anon_fd = NULL; - inode_t *base_inode = NULL; - shard_local_t *local = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *iter = NULL; - struct list_head copy = { - 0, - }; - shard_inode_ctx_t *tmp = NULL; - - local = frame->local; - base_inode = local->fd->inode; - local->postbuf = local->prebuf; - INIT_LIST_HEAD(©); - - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, - local->op_errno); - return 0; - } - - LOCK(&base_inode->lock); - { - __shard_inode_ctx_get(base_inode, this, &ctx); - list_splice_init(&ctx->to_fsync_list, ©); - call_count = ctx->fsync_count; - ctx->fsync_count = 0; - } - UNLOCK(&base_inode->lock); - - local->call_count = ++call_count; - - /* Send fsync() on the base shard first */ - anon_fd = fd_ref(local->fd); - STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, - local->xattr_req); - call_count--; - anon_fd = NULL; - - list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) { - list_del_init(&iter->to_fsync_list); - fsync_count = 0; - shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); - GF_ASSERT(fsync_count > 0); - anon_fd = fd_anonymous(iter->inode); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create " - "anon fd to fsync shard"); - shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, - NULL, NULL, NULL); - continue; - } + local->call_count = ++call_count; - ret = fd_ctx_set(anon_fd, this, fsync_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, - "Failed to set fd " - "ctx for shard inode gfid=%s", - uuid_utoa(iter->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, - NULL, NULL, NULL); - continue; - } + /* Send fsync() on the base shard first */ + anon_fd = fd_ref(local->fd); STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, local->xattr_req); call_count--; - } + anon_fd = NULL; - return 0; + list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) + { + list_del_init(&iter->to_fsync_list); + fsync_count = 0; + shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); + GF_ASSERT(fsync_count > 0); + anon_fd = fd_anonymous(iter->inode); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, + SHARD_MSG_MEMALLOC_FAILED, + "Failed to create " + "anon fd to fsync shard"); + shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, + ENOMEM, NULL, NULL, NULL); + continue; + } + + ret = fd_ctx_set(anon_fd, this, fsync_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, + "Failed to set fd " + "ctx for shard inode gfid=%s", + uuid_utoa(iter->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, + ENOMEM, NULL, NULL, NULL); + continue; + } + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, + anon_fd, local->datasync, local->xattr_req); + call_count--; + } + + return 0; } -int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, - dict_t *xdata) { - int ret = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int +shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + int ret = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa(fd->inode->gfid)); - goto err; - } + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto err; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); - return 0; - } + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + return 0; + } - if (!this->itable) - this->itable = fd->inode->table; + if (!this->itable) + this->itable = fd->inode->table; - local = mem_get0(this->local_pool); - if (!local) - goto err; + local = mem_get0(this->local_pool); + if (!local) + goto err; - frame->local = local; + frame->local = local; - local->fd = fd_ref(fd); - local->fop = GF_FOP_FSYNC; - local->datasync = datasync; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; + local->fd = fd_ref(fd); + local->fop = GF_FOP_FSYNC; + local->datasync = datasync; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_fsync_handler); - return 0; + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_fsync_handler); + return 0; err: - shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); + return 0; } -int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, - gf_dirent_t *orig_entries, dict_t *xdata) { - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - shard_local_t *local = NULL; +int +shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, gf_dirent_t *orig_entries, + dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { - list_del_init(&entry->list); - list_add_tail(&entry->list, &local->entries_head.list); + list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) + { + list_del_init(&entry->list); + list_add_tail(&entry->list, &local->entries_head.list); - if (!entry->dict) - continue; + if (!entry->dict) + continue; - if (IA_ISDIR(entry->d_stat.ia_type)) - continue; + if (IA_ISDIR(entry->d_stat.ia_type)) + continue; - if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) - shard_modify_size_and_block_count(&entry->d_stat, entry->dict); - if (!entry->inode) - continue; + if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) + shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + if (!entry->inode) + continue; - shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); - } - local->op_ret += op_ret; + shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); + } + local->op_ret += op_ret; unwind: - if (local->fop == GF_FOP_READDIR) - SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, - &local->entries_head, xdata); - else - SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, - xdata); - return 0; + if (local->fop == GF_FOP_READDIR) + SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, + &local->entries_head, xdata); + else + SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, + &local->entries_head, xdata); + return 0; } -int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - gf_dirent_t *orig_entries, dict_t *xdata) { - fd_t *fd = NULL; - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - shard_local_t *local = NULL; - gf_boolean_t last_entry = _gf_false; +int32_t +shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, + dict_t *xdata) +{ + fd_t *fd = NULL; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + shard_local_t *local = NULL; + gf_boolean_t last_entry = _gf_false; - local = frame->local; - fd = local->fd; + local = frame->local; + fd = local->fd; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { - if (last_entry) - last_entry = _gf_false; + list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) + { + if (last_entry) + last_entry = _gf_false; + + if (__is_root_gfid(fd->inode->gfid) && + !(strcmp(entry->d_name, GF_SHARD_DIR))) { + local->offset = entry->d_off; + op_ret--; + last_entry = _gf_true; + continue; + } - if (__is_root_gfid(fd->inode->gfid) && - !(strcmp(entry->d_name, GF_SHARD_DIR))) { - local->offset = entry->d_off; - op_ret--; - last_entry = _gf_true; - continue; - } + list_del_init(&entry->list); + list_add_tail(&entry->list, &local->entries_head.list); - list_del_init(&entry->list); - list_add_tail(&entry->list, &local->entries_head.list); + if (!entry->dict) + continue; - if (!entry->dict) - continue; + if (IA_ISDIR(entry->d_stat.ia_type)) + continue; - if (IA_ISDIR(entry->d_stat.ia_type)) - continue; + if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && + frame->root->pid != GF_CLIENT_PID_GSYNCD) + shard_modify_size_and_block_count(&entry->d_stat, entry->dict); - if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && - frame->root->pid != GF_CLIENT_PID_GSYNCD) - shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + if (!entry->inode) + continue; - if (!entry->inode) - continue; + shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); + } - shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); - } + local->op_ret = op_ret; - local->op_ret = op_ret; + if (last_entry) { + if (local->fop == GF_FOP_READDIR) + STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, + local->fd, local->readdir_size, local->offset, + local->xattr_req); + else + STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + local->fd, local->readdir_size, local->offset, + local->xattr_req); + return 0; + } - if (last_entry) { +unwind: if (local->fop == GF_FOP_READDIR) - STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, local->fd, - local->readdir_size, local->offset, local->xattr_req); + SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, + &local->entries_head, xdata); else - STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, local->fd, - local->readdir_size, local->offset, local->xattr_req); + SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, + &local->entries_head, xdata); return 0; - } +} -unwind: - if (local->fop == GF_FOP_READDIR) - SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head, - xdata); - else - SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, - xdata); - return 0; -} - -int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, int whichop, dict_t *xdata) { - int ret = 0; - shard_local_t *local = NULL; - - local = mem_get0(this->local_pool); - if (!local) { - goto err; - } - - frame->local = local; - - local->fd = fd_ref(fd); - local->fop = whichop; - local->readdir_size = size; - INIT_LIST_HEAD(&local->entries_head.list); - local->list_inited = _gf_true; - - if (whichop == GF_FOP_READDIR) { - STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); - } else { - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, - local, err); - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, - "Failed to set " - "dict value: key:%s, directory gfid=%s", - GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); - goto err; +int +shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, int whichop, dict_t *xdata) +{ + int ret = 0; + shard_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (!local) { + goto err; } - STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, offset, - local->xattr_req); - } + frame->local = local; + + local->fd = fd_ref(fd); + local->fop = whichop; + local->readdir_size = size; + INIT_LIST_HEAD(&local->entries_head.list); + local->list_inited = _gf_true; + + if (whichop == GF_FOP_READDIR) { + STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); + } else { + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set " + "dict value: key:%s, directory gfid=%s", + GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); + goto err; + } + + STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, + local->xattr_req); + } - return 0; + return 0; err: - STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); - return 0; + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; } -int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, dict_t *xdata) { - shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); - return 0; +int32_t +shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); + return 0; } -int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset, dict_t *xdata) { - shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); - return 0; +int32_t +shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); + return 0; } int32_t @@ -6037,77 +6450,86 @@ shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } -int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) { - if (op_ret < 0) - goto unwind; +int32_t +shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + if (op_ret < 0) + goto unwind; - if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); - } + if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); + } unwind: - SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } -int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) { - int op_errno = EINVAL; +int32_t +shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + int op_errno = EINVAL; - if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && - (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { - op_errno = ENODATA; - goto out; - } + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && + (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { + op_errno = ENODATA; + goto out; + } - STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); - return 0; + STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); + return 0; } -int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) { - if (op_ret < 0) - goto unwind; +int32_t +shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) +{ + if (op_ret < 0) + goto unwind; - if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); - } + if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); + } unwind: - SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } -int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) { - int op_errno = EINVAL; +int32_t +shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int op_errno = EINVAL; - if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && - (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { - op_errno = ENODATA; - goto out; - } + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && + (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { + op_errno = ENODATA; + goto out; + } - STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); - return 0; + STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); + return 0; } -int32_t shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) { +int32_t +shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ int ret = -1; shard_local_t *local = NULL; @@ -6141,8 +6563,9 @@ err: return 0; } -int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame, - xlator_t *this) { +int32_t +shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this) +{ shard_local_t *local = NULL; local = frame->local; @@ -6164,9 +6587,11 @@ int32_t shard_post_lookup_set_xattr_handler(call_frame_t *frame, return 0; } -int32_t shard_common_set_xattr(call_frame_t *frame, xlator_t *this, - glusterfs_fop_t fop, loc_t *loc, fd_t *fd, - dict_t *dict, int32_t flags, dict_t *xdata) { +int32_t +shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop, + loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags, + dict_t *xdata) +{ int ret = -1; int op_errno = ENOMEM; uint64_t block_size = 0; @@ -6249,489 +6674,531 @@ err: return 0; } -int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int32_t flags, dict_t *xdata) { +int32_t +shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags, xdata); return 0; } -int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *dict, int32_t flags, dict_t *xdata) { +int32_t +shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags, xdata); return 0; } -int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { - shard_local_t *local = NULL; +int +shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) +{ + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->fop == GF_FOP_SETATTR) { - if (local->op_ret >= 0) - shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, - SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, local->xattr_rsp); - } else if (local->fop == GF_FOP_FSETATTR) { - if (local->op_ret >= 0) - shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, - SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, local->xattr_rsp); - } + if (local->fop == GF_FOP_SETATTR) { + if (local->op_ret >= 0) + shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, + SHARD_LOOKUP_MASK); + SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } else if (local->fop == GF_FOP_FSETATTR) { + if (local->op_ret >= 0) + shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, + SHARD_LOOKUP_MASK); + SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } - return 0; + return 0; } -int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) { - shard_local_t *local = NULL; +int +shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } - local->prebuf = *prebuf; - if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } - if (xdata) - local->xattr_rsp = dict_ref(xdata); - local->postbuf = *postbuf; - local->postbuf.ia_size = local->prebuf.ia_size; - local->postbuf.ia_blocks = local->prebuf.ia_blocks; + local->prebuf = *prebuf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + if (xdata) + local->xattr_rsp = dict_ref(xdata); + local->postbuf = *postbuf; + local->postbuf.ia_size = local->prebuf.ia_size; + local->postbuf.ia_blocks = local->prebuf.ia_blocks; unwind: - local->handler(frame, this); - return 0; + local->handler(frame, this); + return 0; } -int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int +shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { - STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); - return 0; - } + if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + } - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; - } + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); - return 0; - } + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + } - local = mem_get0(this->local_pool); - if (!local) - goto err; + local = mem_get0(this->local_pool); + if (!local) + goto err; - frame->local = local; + frame->local = local; - local->handler = shard_post_setattr_handler; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_SETATTR; - loc_copy(&local->loc, loc); + local->handler = shard_post_setattr_handler; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_SETATTR; + loc_copy(&local->loc, loc); - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, - local, err); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, + local, err); - STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, - local->xattr_req); - return 0; + STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, + local->xattr_req); + return 0; err: - shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); + return 0; } -int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int +shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { - STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); - return 0; - } + if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + } - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block size from inode ctx of %s", - uuid_utoa(fd->inode->gfid)); - goto err; - } + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); - return 0; - } + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + } - if (!this->itable) - this->itable = fd->inode->table; + if (!this->itable) + this->itable = fd->inode->table; - local = mem_get0(this->local_pool); - if (!local) - goto err; + local = mem_get0(this->local_pool); + if (!local) + goto err; - frame->local = local; + frame->local = local; - local->handler = shard_post_setattr_handler; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_FSETATTR; - local->fd = fd_ref(fd); + local->handler = shard_post_setattr_handler; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_FSETATTR; + local->fd = fd_ref(fd); - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, - local, err); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); - STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, - local->xattr_req); - return 0; + STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, + local->xattr_req); + return 0; err: - shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); - return 0; -} - -int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, - glusterfs_fop_t fop, fd_t *fd, - struct iovec *vector, int32_t count, - off_t offset, uint32_t flags, size_t len, - struct iobref *iobref, dict_t *xdata) { - int ret = 0; - int i = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa(fd->inode->gfid)); - goto out; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - /* block_size = 0 means that the file was created before - * sharding was enabled on the volume. - */ - switch (fop) { - case GF_FOP_WRITE: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - fd, vector, count, offset, flags, iobref, xdata); - break; - case GF_FOP_FALLOCATE: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fallocate, fd, flags, offset, - len, xdata); - break; - case GF_FOP_ZEROFILL: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->zerofill, fd, offset, len, - xdata); - break; - case GF_FOP_DISCARD: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", fop); - break; - } - return 0; - } - - if (!this->itable) - this->itable = fd->inode->table; - - local = mem_get0(this->local_pool); - if (!local) - goto out; - - frame->local = local; - - ret = syncbarrier_init(&local->barrier); - if (ret) - goto out; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto out; - - if (vector) { - local->vector = iov_dup(vector, count); - if (!local->vector) - goto out; - for (i = 0; i < count; i++) - local->total_size += vector[i].iov_len; - local->count = count; - } else { - local->total_size = len; - } - - local->fop = fop; - local->offset = offset; - local->flags = flags; - if (iobref) - local->iobref = iobref_ref(iobref); - local->fd = fd_ref(fd); - local->block_size = block_size; - local->resolver_base_inode = local->fd->inode; - GF_ATOMIC_INIT(local->delta_blocks, 0); - - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - - shard_lookup_base_file(frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); - return 0; + shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); + return 0; +} + +int +shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, fd_t *fd, + struct iovec *vector, int32_t count, + off_t offset, uint32_t flags, size_t len, + struct iobref *iobref, dict_t *xdata) +{ + int ret = 0; + int i = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + /* block_size = 0 means that the file was created before + * sharding was enabled on the volume. + */ + switch (fop) { + case GF_FOP_WRITE: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, + count, offset, flags, iobref, xdata); + break; + case GF_FOP_FALLOCATE: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, flags, + offset, len, xdata); + break; + case GF_FOP_ZEROFILL: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, + len, xdata); + break; + case GF_FOP_DISCARD: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, + len, xdata); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto out; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto out; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto out; + + if (vector) { + local->vector = iov_dup(vector, count); + if (!local->vector) + goto out; + for (i = 0; i < count; i++) + local->total_size += vector[i].iov_len; + local->count = count; + } else { + local->total_size = len; + } + + local->fop = fop; + local->offset = offset; + local->flags = flags; + if (iobref) + local->iobref = iobref_ref(iobref); + local->fd = fd_ref(fd); + local->block_size = block_size; + local->resolver_base_inode = local->fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + + shard_lookup_base_file(frame, this, &local->loc, + shard_common_inode_write_post_lookup_handler); + return 0; out: - shard_common_failure_unwind(fop, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(fop, frame, -1, ENOMEM); + return 0; } -int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) { - shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, - offset, flags, 0, iobref, xdata); - return 0; +int +shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, + offset, flags, 0, iobref, xdata); + return 0; } -int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t keep_size, off_t offset, size_t len, - dict_t *xdata) { - if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && - (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) - goto out; +int +shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +{ + if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && + (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) + goto out; - shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, - offset, keep_size, len, NULL, xdata); - return 0; + shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, + offset, keep_size, len, NULL, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); - return 0; + shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); + return 0; } -int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) { - shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, - offset, 0, len, NULL, xdata); - return 0; +int +shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, + offset, 0, len, NULL, xdata); + return 0; } -int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) { - shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, - offset, 0, len, NULL, xdata); - return 0; +int +shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, + offset, 0, len, NULL, xdata); + return 0; } -int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - gf_seek_what_t what, dict_t *xdata) { - /* TBD */ - gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, - "seek called on %s.", uuid_utoa(fd->inode->gfid)); - shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); - return 0; +int32_t +shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ + /* TBD */ + gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, + "seek called on %s.", uuid_utoa(fd->inode->gfid)); + shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); + return 0; } -int32_t mem_acct_init(xlator_t *this) { - int ret = -1; +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; - if (!this) - return ret; + if (!this) + return ret; - ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); - if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, - "Memory accounting init" - "failed"); - return ret; - } + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, + "Memory accounting init" + "failed"); + return ret; + } - return ret; + return ret; } -int init(xlator_t *this) { - int ret = -1; - shard_priv_t *priv = NULL; +int +init(xlator_t *this) +{ + int ret = -1; + shard_priv_t *priv = NULL; + + if (!this) { + gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, + "this is NULL. init() failed"); + return -1; + } - if (!this) { - gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, - "this is NULL. init() failed"); - return -1; - } - - if (!this->parents) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, - "Dangling volume. Check volfile"); - goto out; - } - - if (!this->children || this->children->next) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, - "shard not configured with exactly one sub-volume. " - "Check volfile"); - goto out; - } - - priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); - if (!priv) - goto out; - - GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); - - GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); - - GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); - - this->local_pool = mem_pool_new(shard_local_t, 128); - if (!this->local_pool) { - ret = -1; - goto out; - } - gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); - gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); - - this->private = priv; - LOCK_INIT(&priv->lock); - INIT_LIST_HEAD(&priv->ilist_head); - ret = 0; + if (!this->parents) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, + "Dangling volume. Check volfile"); + goto out; + } + + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, + "shard not configured with exactly one sub-volume. " + "Check volfile"); + goto out; + } + + priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); + if (!priv) + goto out; + + GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); + + GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); + + GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); + + this->local_pool = mem_pool_new(shard_local_t, 128); + if (!this->local_pool) { + ret = -1; + goto out; + } + gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); + gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + + this->private = priv; + LOCK_INIT(&priv->lock); + INIT_LIST_HEAD(&priv->ilist_head); + ret = 0; out: - if (ret) { - GF_FREE(priv); - mem_pool_destroy(this->local_pool); - } + if (ret) { + GF_FREE(priv); + mem_pool_destroy(this->local_pool); + } - return ret; + return ret; } -void fini(xlator_t *this) { - shard_priv_t *priv = NULL; +void +fini(xlator_t *this) +{ + shard_priv_t *priv = NULL; - GF_VALIDATE_OR_GOTO("shard", this, out); + GF_VALIDATE_OR_GOTO("shard", this, out); - mem_pool_destroy(this->local_pool); - this->local_pool = NULL; + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; - priv = this->private; - if (!priv) - goto out; + priv = this->private; + if (!priv) + goto out; - this->private = NULL; - LOCK_DESTROY(&priv->lock); - GF_FREE(priv); + this->private = NULL; + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); out: - return; + return; } -int reconfigure(xlator_t *this, dict_t *options) { - int ret = -1; - shard_priv_t *priv = NULL; +int +reconfigure(xlator_t *this, dict_t *options) +{ + int ret = -1; + shard_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); + GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); - GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32, - out); - ret = 0; + GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, + uint32, out); + ret = 0; out: - return ret; + return ret; } -int shard_forget(xlator_t *this, inode_t *inode) { - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; - shard_priv_t *priv = NULL; +int +shard_forget(xlator_t *this, inode_t *inode) +{ + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; + shard_priv_t *priv = NULL; - priv = this->private; - if (!priv) - return 0; + priv = this->private; + if (!priv) + return 0; - inode_ctx_del(inode, this, &ctx_uint); - if (!ctx_uint) - return 0; + inode_ctx_del(inode, this, &ctx_uint); + if (!ctx_uint) + return 0; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - /* When LRU limit reaches inode will be forcefully removed from the - * table, inode needs to be removed from LRU of shard as well. - */ - if (!list_empty(&ctx->ilist)) { - LOCK(&priv->lock); - { - list_del_init(&ctx->ilist); - priv->inode_count--; + /* When LRU limit reaches inode will be forcefully removed from the + * table, inode needs to be removed from LRU of shard as well. + */ + if (!list_empty(&ctx->ilist)) { + LOCK(&priv->lock); + { + list_del_init(&ctx->ilist); + priv->inode_count--; + } + UNLOCK(&priv->lock); } - UNLOCK(&priv->lock); - } - GF_FREE(ctx); + GF_FREE(ctx); - return 0; + return 0; } -int shard_release(xlator_t *this, fd_t *fd) { - /* TBD */ - return 0; +int +shard_release(xlator_t *this, fd_t *fd) +{ + /* TBD */ + return 0; } -int shard_priv_dump(xlator_t *this) { - shard_priv_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = { - 0, - }; - char *str = NULL; +int +shard_priv_dump(xlator_t *this) +{ + shard_priv_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + char *str = NULL; - priv = this->private; + priv = this->private; - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); - gf_proc_dump_add_section("%s", key_prefix); - str = gf_uint64_2human_readable(priv->block_size); - gf_proc_dump_write("shard-block-size", "%s", str); - gf_proc_dump_write("inode-count", "%d", priv->inode_count); - gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); - gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section("%s", key_prefix); + str = gf_uint64_2human_readable(priv->block_size); + gf_proc_dump_write("shard-block-size", "%s", str); + gf_proc_dump_write("inode-count", "%d", priv->inode_count); + gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); + gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); - GF_FREE(str); + GF_FREE(str); - return 0; + return 0; } -int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; } +int +shard_releasedir(xlator_t *this, fd_t *fd) +{ + return 0; +} struct xlator_fops fops = { .lookup = shard_lookup, -- 1.8.3.1