233933
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
233933
From: Kotresh HR <khiremat@redhat.com>
233933
Date: Mon, 29 Jul 2019 16:22:10 +0530
233933
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
233933
233933
Problem:
233933
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
233933
if it's not present. In a multi client scenario, there is a race
233933
which results in updating the ctime xattr to older value.
233933
233933
e.g. Let c1 and c2 be two clients and file1 be the file which
233933
doesn't have the ctime xattr. Let the ctime of file1 be t1.
233933
(from backend, ctime heals time attributes from backend when not present).
233933
233933
Now following operations are done on mount
233933
c1 -> ls -l /mnt/file1  |   c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
233933
233933
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
233933
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
233933
appends the file before c1 heals it, it sets the time to 't1' and updates
233933
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
233933
which is incorrect.
233933
233933
Solution:
233933
Compare the times during heal and only update the larger time. This is the
233933
general approach used in ctime feature but got missed with healing legacy
233933
files.
233933
233933
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
233933
233933
>fixes: bz#1734299
233933
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
233933
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
233933
233933
BUG: 1734305
233933
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
233933
Signed-off-by: Kotresh HR <khiremat@redhat.com>
233933
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
233933
Tested-by: RHGS Build Bot <nigelb@redhat.com>
233933
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
233933
---
233933
 xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
233933
 1 file changed, 58 insertions(+), 18 deletions(-)
233933
233933
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
233933
index 647c0bb..57791fa 100644
233933
--- a/xlators/storage/posix/src/posix-metadata.c
233933
+++ b/xlators/storage/posix/src/posix-metadata.c
233933
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
233933
                                    struct mdata_iatt *mdata_iatt, int *op_errno)
233933
 {
233933
     posix_mdata_t *mdata = NULL;
233933
+    posix_mdata_t imdata = {
233933
+        0,
233933
+    };
233933
     int ret = 0;
233933
+    gf_boolean_t mdata_already_set = _gf_false;
233933
 
233933
     GF_VALIDATE_OR_GOTO("posix", this, out);
233933
     GF_VALIDATE_OR_GOTO(this->name, inode, out);
233933
 
233933
     LOCK(&inode->lock);
233933
     {
233933
-        mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
233933
-        if (!mdata) {
233933
-            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
233933
-                   "Could not allocate mdata. gfid: %s",
233933
-                   uuid_utoa(inode->gfid));
233933
-            ret = -1;
233933
-            *op_errno = ENOMEM;
233933
-            goto unlock;
233933
-        }
233933
+        ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
233933
+        if (ret == 0 && mdata) {
233933
+            mdata_already_set = _gf_true;
233933
+        } else if (ret == -1 || !mdata) {
233933
+            mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
233933
+            if (!mdata) {
233933
+                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
233933
+                       "Could not allocate mdata. gfid: %s",
233933
+                       uuid_utoa(inode->gfid));
233933
+                ret = -1;
233933
+                *op_errno = ENOMEM;
233933
+                goto unlock;
233933
+            }
233933
+
233933
+            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
233933
+                                          op_errno);
233933
+            if (ret == 0) {
233933
+                /* Got mdata from disk. This is a race, another client
233933
+                 * has healed the xattr during lookup. So set it in inode
233933
+                 * ctx */
233933
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
233933
+                mdata_already_set = _gf_true;
233933
+            } else {
233933
+                *op_errno = 0;
233933
+                mdata->version = 1;
233933
+                mdata->flags = 0;
233933
+                mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
233933
+                mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
233933
+                mdata->atime.tv_sec = mdata_iatt->ia_atime;
233933
+                mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
233933
+                mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
233933
+                mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
233933
 
233933
-        mdata->version = 1;
233933
-        mdata->flags = 0;
233933
-        mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
233933
-        mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
233933
-        mdata->atime.tv_sec = mdata_iatt->ia_atime;
233933
-        mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
233933
-        mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
233933
-        mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
233933
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
233933
+            }
233933
+        }
233933
 
233933
-        __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
233933
+        if (mdata_already_set) {
233933
+            /* Compare and update the larger time */
233933
+            imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
233933
+            imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
233933
+            imdata.atime.tv_sec = mdata_iatt->ia_atime;
233933
+            imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
233933
+            imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
233933
+            imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
233933
+
233933
+            if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
233933
+                mdata->ctime = imdata.ctime;
233933
+            }
233933
+            if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
233933
+                mdata->mtime = imdata.mtime;
233933
+            }
233933
+            if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
233933
+                mdata->atime = imdata.atime;
233933
+            }
233933
+        }
233933
 
233933
         ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
233933
         if (ret) {
233933
-- 
233933
1.8.3.1
233933