f338ef
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
f338ef
From: Kotresh HR <khiremat@redhat.com>
f338ef
Date: Mon, 29 Jul 2019 16:22:10 +0530
f338ef
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
f338ef
f338ef
Problem:
f338ef
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
f338ef
if it's not present. In a multi client scenario, there is a race
f338ef
which results in updating the ctime xattr to older value.
f338ef
f338ef
e.g. Let c1 and c2 be two clients and file1 be the file which
f338ef
doesn't have the ctime xattr. Let the ctime of file1 be t1.
f338ef
(from backend, ctime heals time attributes from backend when not present).
f338ef
f338ef
Now following operations are done on mount
f338ef
c1 -> ls -l /mnt/file1  |   c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
f338ef
f338ef
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
f338ef
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
f338ef
appends the file before c1 heals it, it sets the time to 't1' and updates
f338ef
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
f338ef
which is incorrect.
f338ef
f338ef
Solution:
f338ef
Compare the times during heal and only update the larger time. This is the
f338ef
general approach used in ctime feature but got missed with healing legacy
f338ef
files.
f338ef
f338ef
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
f338ef
f338ef
>fixes: bz#1734299
f338ef
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
f338ef
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
f338ef
f338ef
BUG: 1734305
f338ef
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
f338ef
Signed-off-by: Kotresh HR <khiremat@redhat.com>
f338ef
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
f338ef
Tested-by: RHGS Build Bot <nigelb@redhat.com>
f338ef
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
f338ef
---
f338ef
 xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
f338ef
 1 file changed, 58 insertions(+), 18 deletions(-)
f338ef
f338ef
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
f338ef
index 647c0bb..57791fa 100644
f338ef
--- a/xlators/storage/posix/src/posix-metadata.c
f338ef
+++ b/xlators/storage/posix/src/posix-metadata.c
f338ef
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
f338ef
                                    struct mdata_iatt *mdata_iatt, int *op_errno)
f338ef
 {
f338ef
     posix_mdata_t *mdata = NULL;
f338ef
+    posix_mdata_t imdata = {
f338ef
+        0,
f338ef
+    };
f338ef
     int ret = 0;
f338ef
+    gf_boolean_t mdata_already_set = _gf_false;
f338ef
 
f338ef
     GF_VALIDATE_OR_GOTO("posix", this, out);
f338ef
     GF_VALIDATE_OR_GOTO(this->name, inode, out);
f338ef
 
f338ef
     LOCK(&inode->lock);
f338ef
     {
f338ef
-        mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
f338ef
-        if (!mdata) {
f338ef
-            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
f338ef
-                   "Could not allocate mdata. gfid: %s",
f338ef
-                   uuid_utoa(inode->gfid));
f338ef
-            ret = -1;
f338ef
-            *op_errno = ENOMEM;
f338ef
-            goto unlock;
f338ef
-        }
f338ef
+        ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
f338ef
+        if (ret == 0 && mdata) {
f338ef
+            mdata_already_set = _gf_true;
f338ef
+        } else if (ret == -1 || !mdata) {
f338ef
+            mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
f338ef
+            if (!mdata) {
f338ef
+                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
f338ef
+                       "Could not allocate mdata. gfid: %s",
f338ef
+                       uuid_utoa(inode->gfid));
f338ef
+                ret = -1;
f338ef
+                *op_errno = ENOMEM;
f338ef
+                goto unlock;
f338ef
+            }
f338ef
+
f338ef
+            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
f338ef
+                                          op_errno);
f338ef
+            if (ret == 0) {
f338ef
+                /* Got mdata from disk. This is a race, another client
f338ef
+                 * has healed the xattr during lookup. So set it in inode
f338ef
+                 * ctx */
f338ef
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
f338ef
+                mdata_already_set = _gf_true;
f338ef
+            } else {
f338ef
+                *op_errno = 0;
f338ef
+                mdata->version = 1;
f338ef
+                mdata->flags = 0;
f338ef
+                mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
f338ef
+                mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
f338ef
+                mdata->atime.tv_sec = mdata_iatt->ia_atime;
f338ef
+                mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
f338ef
+                mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
f338ef
+                mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
f338ef
 
f338ef
-        mdata->version = 1;
f338ef
-        mdata->flags = 0;
f338ef
-        mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
f338ef
-        mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
f338ef
-        mdata->atime.tv_sec = mdata_iatt->ia_atime;
f338ef
-        mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
f338ef
-        mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
f338ef
-        mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
f338ef
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
f338ef
+            }
f338ef
+        }
f338ef
 
f338ef
-        __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
f338ef
+        if (mdata_already_set) {
f338ef
+            /* Compare and update the larger time */
f338ef
+            imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
f338ef
+            imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
f338ef
+            imdata.atime.tv_sec = mdata_iatt->ia_atime;
f338ef
+            imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
f338ef
+            imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
f338ef
+            imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
f338ef
+
f338ef
+            if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
f338ef
+                mdata->ctime = imdata.ctime;
f338ef
+            }
f338ef
+            if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
f338ef
+                mdata->mtime = imdata.mtime;
f338ef
+            }
f338ef
+            if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
f338ef
+                mdata->atime = imdata.atime;
f338ef
+            }
f338ef
+        }
f338ef
 
f338ef
         ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
f338ef
         if (ret) {
f338ef
-- 
f338ef
1.8.3.1
f338ef