50dc83
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
50dc83
From: Kotresh HR <khiremat@redhat.com>
50dc83
Date: Mon, 29 Jul 2019 16:22:10 +0530
50dc83
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
50dc83
50dc83
Problem:
50dc83
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
50dc83
if it's not present. In a multi client scenario, there is a race
50dc83
which results in updating the ctime xattr to older value.
50dc83
50dc83
e.g. Let c1 and c2 be two clients and file1 be the file which
50dc83
doesn't have the ctime xattr. Let the ctime of file1 be t1.
50dc83
(from backend, ctime heals time attributes from backend when not present).
50dc83
50dc83
Now following operations are done on mount
50dc83
c1 -> ls -l /mnt/file1  |   c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
50dc83
50dc83
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
50dc83
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
50dc83
appends the file before c1 heals it, it sets the time to 't1' and updates
50dc83
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
50dc83
which is incorrect.
50dc83
50dc83
Solution:
50dc83
Compare the times during heal and only update the larger time. This is the
50dc83
general approach used in ctime feature but got missed with healing legacy
50dc83
files.
50dc83
50dc83
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
50dc83
50dc83
>fixes: bz#1734299
50dc83
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
50dc83
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
50dc83
50dc83
BUG: 1734305
50dc83
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
50dc83
Signed-off-by: Kotresh HR <khiremat@redhat.com>
50dc83
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
50dc83
Tested-by: RHGS Build Bot <nigelb@redhat.com>
50dc83
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
50dc83
---
50dc83
 xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
50dc83
 1 file changed, 58 insertions(+), 18 deletions(-)
50dc83
50dc83
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
50dc83
index 647c0bb..57791fa 100644
50dc83
--- a/xlators/storage/posix/src/posix-metadata.c
50dc83
+++ b/xlators/storage/posix/src/posix-metadata.c
50dc83
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
50dc83
                                    struct mdata_iatt *mdata_iatt, int *op_errno)
50dc83
 {
50dc83
     posix_mdata_t *mdata = NULL;
50dc83
+    posix_mdata_t imdata = {
50dc83
+        0,
50dc83
+    };
50dc83
     int ret = 0;
50dc83
+    gf_boolean_t mdata_already_set = _gf_false;
50dc83
 
50dc83
     GF_VALIDATE_OR_GOTO("posix", this, out);
50dc83
     GF_VALIDATE_OR_GOTO(this->name, inode, out);
50dc83
 
50dc83
     LOCK(&inode->lock);
50dc83
     {
50dc83
-        mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
50dc83
-        if (!mdata) {
50dc83
-            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
50dc83
-                   "Could not allocate mdata. gfid: %s",
50dc83
-                   uuid_utoa(inode->gfid));
50dc83
-            ret = -1;
50dc83
-            *op_errno = ENOMEM;
50dc83
-            goto unlock;
50dc83
-        }
50dc83
+        ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
50dc83
+        if (ret == 0 && mdata) {
50dc83
+            mdata_already_set = _gf_true;
50dc83
+        } else if (ret == -1 || !mdata) {
50dc83
+            mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
50dc83
+            if (!mdata) {
50dc83
+                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
50dc83
+                       "Could not allocate mdata. gfid: %s",
50dc83
+                       uuid_utoa(inode->gfid));
50dc83
+                ret = -1;
50dc83
+                *op_errno = ENOMEM;
50dc83
+                goto unlock;
50dc83
+            }
50dc83
+
50dc83
+            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
50dc83
+                                          op_errno);
50dc83
+            if (ret == 0) {
50dc83
+                /* Got mdata from disk. This is a race, another client
50dc83
+                 * has healed the xattr during lookup. So set it in inode
50dc83
+                 * ctx */
50dc83
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
50dc83
+                mdata_already_set = _gf_true;
50dc83
+            } else {
50dc83
+                *op_errno = 0;
50dc83
+                mdata->version = 1;
50dc83
+                mdata->flags = 0;
50dc83
+                mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
50dc83
+                mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
50dc83
+                mdata->atime.tv_sec = mdata_iatt->ia_atime;
50dc83
+                mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
50dc83
+                mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
50dc83
+                mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
50dc83
 
50dc83
-        mdata->version = 1;
50dc83
-        mdata->flags = 0;
50dc83
-        mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
50dc83
-        mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
50dc83
-        mdata->atime.tv_sec = mdata_iatt->ia_atime;
50dc83
-        mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
50dc83
-        mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
50dc83
-        mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
50dc83
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
50dc83
+            }
50dc83
+        }
50dc83
 
50dc83
-        __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
50dc83
+        if (mdata_already_set) {
50dc83
+            /* Compare and update the larger time */
50dc83
+            imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
50dc83
+            imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
50dc83
+            imdata.atime.tv_sec = mdata_iatt->ia_atime;
50dc83
+            imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
50dc83
+            imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
50dc83
+            imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
50dc83
+
50dc83
+            if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
50dc83
+                mdata->ctime = imdata.ctime;
50dc83
+            }
50dc83
+            if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
50dc83
+                mdata->mtime = imdata.mtime;
50dc83
+            }
50dc83
+            if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
50dc83
+                mdata->atime = imdata.atime;
50dc83
+            }
50dc83
+        }
50dc83
 
50dc83
         ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
50dc83
         if (ret) {
50dc83
-- 
50dc83
1.8.3.1
50dc83