|
|
50dc83 |
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
|
|
|
50dc83 |
From: Kotresh HR <khiremat@redhat.com>
|
|
|
50dc83 |
Date: Mon, 29 Jul 2019 16:22:10 +0530
|
|
|
50dc83 |
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
|
|
|
50dc83 |
|
|
|
50dc83 |
Problem:
|
|
|
50dc83 |
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
|
|
|
50dc83 |
if it's not present. In a multi client scenario, there is a race
|
|
|
50dc83 |
which results in updating the ctime xattr to older value.
|
|
|
50dc83 |
|
|
|
50dc83 |
e.g. Let c1 and c2 be two clients and file1 be the file which
|
|
|
50dc83 |
doesn't have the ctime xattr. Let the ctime of file1 be t1.
|
|
|
50dc83 |
(from backend, ctime heals time attributes from backend when not present).
|
|
|
50dc83 |
|
|
|
50dc83 |
Now following operations are done on mount
|
|
|
50dc83 |
c1 -> ls -l /mnt/file1 | c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
|
|
|
50dc83 |
|
|
|
50dc83 |
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
|
|
|
50dc83 |
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
|
|
|
50dc83 |
appends the file before c1 heals it, it sets the time to 't1' and updates
|
|
|
50dc83 |
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
|
|
|
50dc83 |
which is incorrect.
|
|
|
50dc83 |
|
|
|
50dc83 |
Solution:
|
|
|
50dc83 |
Compare the times during heal and only update the larger time. This is the
|
|
|
50dc83 |
general approach used in ctime feature but got missed with healing legacy
|
|
|
50dc83 |
files.
|
|
|
50dc83 |
|
|
|
50dc83 |
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
|
|
|
50dc83 |
|
|
|
50dc83 |
>fixes: bz#1734299
|
|
|
50dc83 |
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
|
|
|
50dc83 |
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
|
|
50dc83 |
|
|
|
50dc83 |
BUG: 1734305
|
|
|
50dc83 |
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
|
|
|
50dc83 |
Signed-off-by: Kotresh HR <khiremat@redhat.com>
|
|
|
50dc83 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
|
|
|
50dc83 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
50dc83 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
50dc83 |
---
|
|
|
50dc83 |
xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
|
|
|
50dc83 |
1 file changed, 58 insertions(+), 18 deletions(-)
|
|
|
50dc83 |
|
|
|
50dc83 |
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
|
|
|
50dc83 |
index 647c0bb..57791fa 100644
|
|
|
50dc83 |
--- a/xlators/storage/posix/src/posix-metadata.c
|
|
|
50dc83 |
+++ b/xlators/storage/posix/src/posix-metadata.c
|
|
|
50dc83 |
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
|
|
|
50dc83 |
struct mdata_iatt *mdata_iatt, int *op_errno)
|
|
|
50dc83 |
{
|
|
|
50dc83 |
posix_mdata_t *mdata = NULL;
|
|
|
50dc83 |
+ posix_mdata_t imdata = {
|
|
|
50dc83 |
+ 0,
|
|
|
50dc83 |
+ };
|
|
|
50dc83 |
int ret = 0;
|
|
|
50dc83 |
+ gf_boolean_t mdata_already_set = _gf_false;
|
|
|
50dc83 |
|
|
|
50dc83 |
GF_VALIDATE_OR_GOTO("posix", this, out);
|
|
|
50dc83 |
GF_VALIDATE_OR_GOTO(this->name, inode, out);
|
|
|
50dc83 |
|
|
|
50dc83 |
LOCK(&inode->lock);
|
|
|
50dc83 |
{
|
|
|
50dc83 |
- mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
|
|
|
50dc83 |
- if (!mdata) {
|
|
|
50dc83 |
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
|
|
|
50dc83 |
- "Could not allocate mdata. gfid: %s",
|
|
|
50dc83 |
- uuid_utoa(inode->gfid));
|
|
|
50dc83 |
- ret = -1;
|
|
|
50dc83 |
- *op_errno = ENOMEM;
|
|
|
50dc83 |
- goto unlock;
|
|
|
50dc83 |
- }
|
|
|
50dc83 |
+ ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
|
|
|
50dc83 |
+ if (ret == 0 && mdata) {
|
|
|
50dc83 |
+ mdata_already_set = _gf_true;
|
|
|
50dc83 |
+ } else if (ret == -1 || !mdata) {
|
|
|
50dc83 |
+ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
|
|
|
50dc83 |
+ if (!mdata) {
|
|
|
50dc83 |
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
|
|
|
50dc83 |
+ "Could not allocate mdata. gfid: %s",
|
|
|
50dc83 |
+ uuid_utoa(inode->gfid));
|
|
|
50dc83 |
+ ret = -1;
|
|
|
50dc83 |
+ *op_errno = ENOMEM;
|
|
|
50dc83 |
+ goto unlock;
|
|
|
50dc83 |
+ }
|
|
|
50dc83 |
+
|
|
|
50dc83 |
+ ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
|
|
|
50dc83 |
+ op_errno);
|
|
|
50dc83 |
+ if (ret == 0) {
|
|
|
50dc83 |
+ /* Got mdata from disk. This is a race, another client
|
|
|
50dc83 |
+ * has healed the xattr during lookup. So set it in inode
|
|
|
50dc83 |
+ * ctx */
|
|
|
50dc83 |
+ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
|
|
|
50dc83 |
+ mdata_already_set = _gf_true;
|
|
|
50dc83 |
+ } else {
|
|
|
50dc83 |
+ *op_errno = 0;
|
|
|
50dc83 |
+ mdata->version = 1;
|
|
|
50dc83 |
+ mdata->flags = 0;
|
|
|
50dc83 |
+ mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
|
|
|
50dc83 |
+ mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
|
|
|
50dc83 |
+ mdata->atime.tv_sec = mdata_iatt->ia_atime;
|
|
|
50dc83 |
+ mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
|
|
|
50dc83 |
+ mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
|
|
|
50dc83 |
+ mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
|
|
|
50dc83 |
|
|
|
50dc83 |
- mdata->version = 1;
|
|
|
50dc83 |
- mdata->flags = 0;
|
|
|
50dc83 |
- mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
|
|
|
50dc83 |
- mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
|
|
|
50dc83 |
- mdata->atime.tv_sec = mdata_iatt->ia_atime;
|
|
|
50dc83 |
- mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
|
|
|
50dc83 |
- mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
|
|
|
50dc83 |
- mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
|
|
|
50dc83 |
+ __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
|
|
|
50dc83 |
+ }
|
|
|
50dc83 |
+ }
|
|
|
50dc83 |
|
|
|
50dc83 |
- __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
|
|
|
50dc83 |
+ if (mdata_already_set) {
|
|
|
50dc83 |
+ /* Compare and update the larger time */
|
|
|
50dc83 |
+ imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
|
|
|
50dc83 |
+ imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
|
|
|
50dc83 |
+ imdata.atime.tv_sec = mdata_iatt->ia_atime;
|
|
|
50dc83 |
+ imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
|
|
|
50dc83 |
+ imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
|
|
|
50dc83 |
+ imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
|
|
|
50dc83 |
+
|
|
|
50dc83 |
+ if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
|
|
|
50dc83 |
+ mdata->ctime = imdata.ctime;
|
|
|
50dc83 |
+ }
|
|
|
50dc83 |
+ if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
|
|
|
50dc83 |
+ mdata->mtime = imdata.mtime;
|
|
|
50dc83 |
+ }
|
|
|
50dc83 |
+ if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
|
|
|
50dc83 |
+ mdata->atime = imdata.atime;
|
|
|
50dc83 |
+ }
|
|
|
50dc83 |
+ }
|
|
|
50dc83 |
|
|
|
50dc83 |
ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
|
|
|
50dc83 |
if (ret) {
|
|
|
50dc83 |
--
|
|
|
50dc83 |
1.8.3.1
|
|
|
50dc83 |
|