From 7afb6d637cf5a27c7c62cd0deefc235a06d1d790 Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Mon, 10 Jul 2017 09:38:54 +0530 Subject: [PATCH 560/566] cluster/dht: Fix fd check race There is a another race between the cached subvol being updated in the inode_ctx and the fd being opened on the target. 1. fop1 -> fd1 -> subvol0 2. file migrated from subvol0 to subvol1 and cached_subvol changed to subvol1 in inode_ctx 3. fop2 -> fd1 -> subvol1 [takes new cached subvol] 4. fop2 -> checks fd ctx (fd not open on subvol1) -> opens fd1 on subvol1 5. fop1 -> checks fd ctx (fd not open on subvol0) -> tries to open fd1 on subvol0 -> fails with "No such file on directory". Fix: If dht_fd_open_on_dst fails with ENOENT or ESTALE, wind to old subvol and let the phase1/phase2 checks handle it. > BUG: 1465075 > Signed-off-by: N Balachandran > Reviewed-on: https://review.gluster.org/17731 > Smoke: Gluster Build System > CentOS-regression: Gluster Build System > Reviewed-by: Raghavendra G > Reviewed-by: Amar Tumballi Change-Id: I34f8011574a8b72e3bcfe03b0cc4f024b352f225 BUG: 1463907 Signed-off-by: N Balachandran Reviewed-on: https://code.engineering.redhat.com/gerrit/111949 Reviewed-by: Atin Mukherjee --- xlators/cluster/dht/src/dht-helper.c | 26 +++++++++++++++++ xlators/cluster/dht/src/dht-messages.h | 51 +++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 50cdb83..6bad5db 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -288,10 +288,12 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame, glusterfs_fop_t fop = 0; dht_local_t *local = NULL; xlator_t *subvol = NULL; + xlator_t *this = NULL; fd_t *fd = NULL; int op_errno = -1; local = frame->local; + this = frame->this; fop = local->fop; subvol = local->cached_subvol; fd = local->fd; @@ -380,6 +382,11 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame, break; default: + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", + fd, uuid_utoa (fd->inode->gfid), + subvol->name); break; } @@ -440,6 +447,11 @@ handle_err: break; default: + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", + fd, uuid_utoa (fd->inode->gfid), + subvol->name); break; } @@ -503,6 +515,20 @@ dht_check_and_open_fd_on_subvol_task (void *data) " (%p, flags=0%o) on file %s @ %s", fd, fd->flags, uuid_utoa (fd->inode->gfid), subvol->name); + /* This can happen if the cached subvol was updated in the + * inode_ctx and the fd was opened on the new cached suvol + * after this fop was wound on the old cached subvol. + * As we do not close the fd on the old subvol (a leak) + * don't treat ENOENT as an error and allow the phase1/phase2 + * checks to handle it. + */ + + if ((-ret != ENOENT) && (-ret != ESTALE)) { + local->op_errno = -ret; + ret = -1; + } else { + ret = 0; + } local->op_errno = -ret; ret = -1; diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 30b64eb..b6184eb 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES 118 +#define GLFS_DHT_NUM_MESSAGES 125 #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) /* Messages with message IDs */ @@ -1085,5 +1085,54 @@ */ #define DHT_MSG_DIR_LOOKUP_FAILED (GLFS_DHT_BASE + 118) +/* + * @messageid 109119 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_INODELK_FAILED (GLFS_DHT_BASE + 119) + +/* + * @messageid 109120 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOCK_FRAME_FAILED (GLFS_DHT_BASE + 120) + +/* + * @messageid 109121 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOCAL_LOCK_INIT_FAILED (GLFS_DHT_BASE + 121) + +/* + * @messageid 109122 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_ENTRYLK_ERROR (GLFS_DHT_BASE + 122) + +/* + * @messageid 109123 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_INODELK_ERROR (GLFS_DHT_BASE + 123) + +/* + * @messageid 109124 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOC_FAILED (GLFS_DHT_BASE + 124) + +/* + * @messageid 109125 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_UNKNOWN_FOP (GLFS_DHT_BASE + 125) + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* _DHT_MESSAGES_H_ */ -- 1.8.3.1