74b1de
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
74b1de
From: Kotresh HR <khiremat@redhat.com>
74b1de
Date: Mon, 29 Jul 2019 16:22:10 +0530
74b1de
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
74b1de
74b1de
Problem:
74b1de
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
74b1de
if it's not present. In a multi client scenario, there is a race
74b1de
which results in updating the ctime xattr to older value.
74b1de
74b1de
e.g. Let c1 and c2 be two clients and file1 be the file which
74b1de
doesn't have the ctime xattr. Let the ctime of file1 be t1.
74b1de
(from backend, ctime heals time attributes from backend when not present).
74b1de
74b1de
Now following operations are done on mount
74b1de
c1 -> ls -l /mnt/file1  |   c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
74b1de
74b1de
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
74b1de
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
74b1de
appends the file before c1 heals it, it sets the time to 't1' and updates
74b1de
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
74b1de
which is incorrect.
74b1de
74b1de
Solution:
74b1de
Compare the times during heal and only update the larger time. This is the
74b1de
general approach used in ctime feature but got missed with healing legacy
74b1de
files.
74b1de
74b1de
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
74b1de
74b1de
>fixes: bz#1734299
74b1de
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
74b1de
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
74b1de
74b1de
BUG: 1734305
74b1de
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
74b1de
Signed-off-by: Kotresh HR <khiremat@redhat.com>
74b1de
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
74b1de
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74b1de
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74b1de
---
74b1de
 xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
74b1de
 1 file changed, 58 insertions(+), 18 deletions(-)
74b1de
74b1de
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
74b1de
index 647c0bb..57791fa 100644
74b1de
--- a/xlators/storage/posix/src/posix-metadata.c
74b1de
+++ b/xlators/storage/posix/src/posix-metadata.c
74b1de
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
74b1de
                                    struct mdata_iatt *mdata_iatt, int *op_errno)
74b1de
 {
74b1de
     posix_mdata_t *mdata = NULL;
74b1de
+    posix_mdata_t imdata = {
74b1de
+        0,
74b1de
+    };
74b1de
     int ret = 0;
74b1de
+    gf_boolean_t mdata_already_set = _gf_false;
74b1de
 
74b1de
     GF_VALIDATE_OR_GOTO("posix", this, out);
74b1de
     GF_VALIDATE_OR_GOTO(this->name, inode, out);
74b1de
 
74b1de
     LOCK(&inode->lock);
74b1de
     {
74b1de
-        mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
74b1de
-        if (!mdata) {
74b1de
-            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
74b1de
-                   "Could not allocate mdata. gfid: %s",
74b1de
-                   uuid_utoa(inode->gfid));
74b1de
-            ret = -1;
74b1de
-            *op_errno = ENOMEM;
74b1de
-            goto unlock;
74b1de
-        }
74b1de
+        ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
74b1de
+        if (ret == 0 && mdata) {
74b1de
+            mdata_already_set = _gf_true;
74b1de
+        } else if (ret == -1 || !mdata) {
74b1de
+            mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
74b1de
+            if (!mdata) {
74b1de
+                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
74b1de
+                       "Could not allocate mdata. gfid: %s",
74b1de
+                       uuid_utoa(inode->gfid));
74b1de
+                ret = -1;
74b1de
+                *op_errno = ENOMEM;
74b1de
+                goto unlock;
74b1de
+            }
74b1de
+
74b1de
+            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
74b1de
+                                          op_errno);
74b1de
+            if (ret == 0) {
74b1de
+                /* Got mdata from disk. This is a race, another client
74b1de
+                 * has healed the xattr during lookup. So set it in inode
74b1de
+                 * ctx */
74b1de
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
74b1de
+                mdata_already_set = _gf_true;
74b1de
+            } else {
74b1de
+                *op_errno = 0;
74b1de
+                mdata->version = 1;
74b1de
+                mdata->flags = 0;
74b1de
+                mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
74b1de
+                mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
74b1de
+                mdata->atime.tv_sec = mdata_iatt->ia_atime;
74b1de
+                mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
74b1de
+                mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
74b1de
+                mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
74b1de
 
74b1de
-        mdata->version = 1;
74b1de
-        mdata->flags = 0;
74b1de
-        mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
74b1de
-        mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
74b1de
-        mdata->atime.tv_sec = mdata_iatt->ia_atime;
74b1de
-        mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
74b1de
-        mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
74b1de
-        mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
74b1de
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
74b1de
+            }
74b1de
+        }
74b1de
 
74b1de
-        __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
74b1de
+        if (mdata_already_set) {
74b1de
+            /* Compare and update the larger time */
74b1de
+            imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
74b1de
+            imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
74b1de
+            imdata.atime.tv_sec = mdata_iatt->ia_atime;
74b1de
+            imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
74b1de
+            imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
74b1de
+            imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
74b1de
+
74b1de
+            if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
74b1de
+                mdata->ctime = imdata.ctime;
74b1de
+            }
74b1de
+            if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
74b1de
+                mdata->mtime = imdata.mtime;
74b1de
+            }
74b1de
+            if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
74b1de
+                mdata->atime = imdata.atime;
74b1de
+            }
74b1de
+        }
74b1de
 
74b1de
         ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
74b1de
         if (ret) {
74b1de
-- 
74b1de
1.8.3.1
74b1de