14f8ab
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
14f8ab
From: Kotresh HR <khiremat@redhat.com>
14f8ab
Date: Mon, 29 Jul 2019 16:22:10 +0530
14f8ab
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
14f8ab
14f8ab
Problem:
14f8ab
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
14f8ab
if it's not present. In a multi client scenario, there is a race
14f8ab
which results in updating the ctime xattr to older value.
14f8ab
14f8ab
e.g. Let c1 and c2 be two clients and file1 be the file which
14f8ab
doesn't have the ctime xattr. Let the ctime of file1 be t1.
14f8ab
(from backend, ctime heals time attributes from backend when not present).
14f8ab
14f8ab
Now following operations are done on mount
14f8ab
c1 -> ls -l /mnt/file1  |   c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
14f8ab
14f8ab
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
14f8ab
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
14f8ab
appends the file before c1 heals it, it sets the time to 't1' and updates
14f8ab
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
14f8ab
which is incorrect.
14f8ab
14f8ab
Solution:
14f8ab
Compare the times during heal and only update the larger time. This is the
14f8ab
general approach used in ctime feature but got missed with healing legacy
14f8ab
files.
14f8ab
14f8ab
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
14f8ab
14f8ab
>fixes: bz#1734299
14f8ab
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
14f8ab
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
14f8ab
14f8ab
BUG: 1734305
14f8ab
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
14f8ab
Signed-off-by: Kotresh HR <khiremat@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
14f8ab
 1 file changed, 58 insertions(+), 18 deletions(-)
14f8ab
14f8ab
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
14f8ab
index 647c0bb..57791fa 100644
14f8ab
--- a/xlators/storage/posix/src/posix-metadata.c
14f8ab
+++ b/xlators/storage/posix/src/posix-metadata.c
14f8ab
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
14f8ab
                                    struct mdata_iatt *mdata_iatt, int *op_errno)
14f8ab
 {
14f8ab
     posix_mdata_t *mdata = NULL;
14f8ab
+    posix_mdata_t imdata = {
14f8ab
+        0,
14f8ab
+    };
14f8ab
     int ret = 0;
14f8ab
+    gf_boolean_t mdata_already_set = _gf_false;
14f8ab
 
14f8ab
     GF_VALIDATE_OR_GOTO("posix", this, out);
14f8ab
     GF_VALIDATE_OR_GOTO(this->name, inode, out);
14f8ab
 
14f8ab
     LOCK(&inode->lock);
14f8ab
     {
14f8ab
-        mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
14f8ab
-        if (!mdata) {
14f8ab
-            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
14f8ab
-                   "Could not allocate mdata. gfid: %s",
14f8ab
-                   uuid_utoa(inode->gfid));
14f8ab
-            ret = -1;
14f8ab
-            *op_errno = ENOMEM;
14f8ab
-            goto unlock;
14f8ab
-        }
14f8ab
+        ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
14f8ab
+        if (ret == 0 && mdata) {
14f8ab
+            mdata_already_set = _gf_true;
14f8ab
+        } else if (ret == -1 || !mdata) {
14f8ab
+            mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
14f8ab
+            if (!mdata) {
14f8ab
+                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
14f8ab
+                       "Could not allocate mdata. gfid: %s",
14f8ab
+                       uuid_utoa(inode->gfid));
14f8ab
+                ret = -1;
14f8ab
+                *op_errno = ENOMEM;
14f8ab
+                goto unlock;
14f8ab
+            }
14f8ab
+
14f8ab
+            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
14f8ab
+                                          op_errno);
14f8ab
+            if (ret == 0) {
14f8ab
+                /* Got mdata from disk. This is a race, another client
14f8ab
+                 * has healed the xattr during lookup. So set it in inode
14f8ab
+                 * ctx */
14f8ab
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
14f8ab
+                mdata_already_set = _gf_true;
14f8ab
+            } else {
14f8ab
+                *op_errno = 0;
14f8ab
+                mdata->version = 1;
14f8ab
+                mdata->flags = 0;
14f8ab
+                mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
14f8ab
+                mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
14f8ab
+                mdata->atime.tv_sec = mdata_iatt->ia_atime;
14f8ab
+                mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
14f8ab
+                mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
14f8ab
+                mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
14f8ab
 
14f8ab
-        mdata->version = 1;
14f8ab
-        mdata->flags = 0;
14f8ab
-        mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
14f8ab
-        mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
14f8ab
-        mdata->atime.tv_sec = mdata_iatt->ia_atime;
14f8ab
-        mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
14f8ab
-        mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
14f8ab
-        mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
14f8ab
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
14f8ab
+            }
14f8ab
+        }
14f8ab
 
14f8ab
-        __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
14f8ab
+        if (mdata_already_set) {
14f8ab
+            /* Compare and update the larger time */
14f8ab
+            imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
14f8ab
+            imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
14f8ab
+            imdata.atime.tv_sec = mdata_iatt->ia_atime;
14f8ab
+            imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
14f8ab
+            imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
14f8ab
+            imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
14f8ab
+
14f8ab
+            if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
14f8ab
+                mdata->ctime = imdata.ctime;
14f8ab
+            }
14f8ab
+            if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
14f8ab
+                mdata->mtime = imdata.mtime;
14f8ab
+            }
14f8ab
+            if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
14f8ab
+                mdata->atime = imdata.atime;
14f8ab
+            }
14f8ab
+        }
14f8ab
 
14f8ab
         ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
14f8ab
         if (ret) {
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab