17b94a
From cf13847a6341b7519ed0dc51e3b9ecf12444a3e4 Mon Sep 17 00:00:00 2001
17b94a
From: Kotresh HR <khiremat@redhat.com>
17b94a
Date: Mon, 29 Jul 2019 16:22:10 +0530
17b94a
Subject: [PATCH 267/276] posix/ctime: Fix race during lookup ctime xattr heal
17b94a
17b94a
Problem:
17b94a
Ctime heals the ctime xattr ("trusted.glusterfs.mdata") in lookup
17b94a
if it's not present. In a multi client scenario, there is a race
17b94a
which results in updating the ctime xattr to older value.
17b94a
17b94a
e.g. Let c1 and c2 be two clients and file1 be the file which
17b94a
doesn't have the ctime xattr. Let the ctime of file1 be t1.
17b94a
(from backend, ctime heals time attributes from backend when not present).
17b94a
17b94a
Now following operations are done on mount
17b94a
c1 -> ls -l /mnt/file1  |   c2 -> ls -l /mnt/file1;echo "append" >> /mnt/file1;
17b94a
17b94a
The race is that the both c1 and c2 didn't fetch the ctime xattr in lookup,
17b94a
so both of them tries to heal ctime to time 't1'. If c2 wins the race and
17b94a
appends the file before c1 heals it, it sets the time to 't1' and updates
17b94a
it to 't2' (because of append). Now c1 proceeds to heal and sets it to 't1'
17b94a
which is incorrect.
17b94a
17b94a
Solution:
17b94a
Compare the times during heal and only update the larger time. This is the
17b94a
general approach used in ctime feature but got missed with healing legacy
17b94a
files.
17b94a
17b94a
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/23131/
17b94a
17b94a
>fixes: bz#1734299
17b94a
>Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
17b94a
>Signed-off-by: Kotresh HR <khiremat@redhat.com>
17b94a
17b94a
BUG: 1734305
17b94a
Change-Id: I930bda192c64c3d49d0aed431ce23d3bc57e51b7
17b94a
Signed-off-by: Kotresh HR <khiremat@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/177866
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 xlators/storage/posix/src/posix-metadata.c | 76 +++++++++++++++++++++++-------
17b94a
 1 file changed, 58 insertions(+), 18 deletions(-)
17b94a
17b94a
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
17b94a
index 647c0bb..57791fa 100644
17b94a
--- a/xlators/storage/posix/src/posix-metadata.c
17b94a
+++ b/xlators/storage/posix/src/posix-metadata.c
17b94a
@@ -344,33 +344,73 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
17b94a
                                    struct mdata_iatt *mdata_iatt, int *op_errno)
17b94a
 {
17b94a
     posix_mdata_t *mdata = NULL;
17b94a
+    posix_mdata_t imdata = {
17b94a
+        0,
17b94a
+    };
17b94a
     int ret = 0;
17b94a
+    gf_boolean_t mdata_already_set = _gf_false;
17b94a
 
17b94a
     GF_VALIDATE_OR_GOTO("posix", this, out);
17b94a
     GF_VALIDATE_OR_GOTO(this->name, inode, out);
17b94a
 
17b94a
     LOCK(&inode->lock);
17b94a
     {
17b94a
-        mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
17b94a
-        if (!mdata) {
17b94a
-            gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
17b94a
-                   "Could not allocate mdata. gfid: %s",
17b94a
-                   uuid_utoa(inode->gfid));
17b94a
-            ret = -1;
17b94a
-            *op_errno = ENOMEM;
17b94a
-            goto unlock;
17b94a
-        }
17b94a
+        ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
17b94a
+        if (ret == 0 && mdata) {
17b94a
+            mdata_already_set = _gf_true;
17b94a
+        } else if (ret == -1 || !mdata) {
17b94a
+            mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
17b94a
+            if (!mdata) {
17b94a
+                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM,
17b94a
+                       "Could not allocate mdata. gfid: %s",
17b94a
+                       uuid_utoa(inode->gfid));
17b94a
+                ret = -1;
17b94a
+                *op_errno = ENOMEM;
17b94a
+                goto unlock;
17b94a
+            }
17b94a
+
17b94a
+            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
17b94a
+                                          op_errno);
17b94a
+            if (ret == 0) {
17b94a
+                /* Got mdata from disk. This is a race, another client
17b94a
+                 * has healed the xattr during lookup. So set it in inode
17b94a
+                 * ctx */
17b94a
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
17b94a
+                mdata_already_set = _gf_true;
17b94a
+            } else {
17b94a
+                *op_errno = 0;
17b94a
+                mdata->version = 1;
17b94a
+                mdata->flags = 0;
17b94a
+                mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
17b94a
+                mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
17b94a
+                mdata->atime.tv_sec = mdata_iatt->ia_atime;
17b94a
+                mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
17b94a
+                mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
17b94a
+                mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
17b94a
 
17b94a
-        mdata->version = 1;
17b94a
-        mdata->flags = 0;
17b94a
-        mdata->ctime.tv_sec = mdata_iatt->ia_ctime;
17b94a
-        mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
17b94a
-        mdata->atime.tv_sec = mdata_iatt->ia_atime;
17b94a
-        mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec;
17b94a
-        mdata->mtime.tv_sec = mdata_iatt->ia_mtime;
17b94a
-        mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
17b94a
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
17b94a
+            }
17b94a
+        }
17b94a
 
17b94a
-        __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
17b94a
+        if (mdata_already_set) {
17b94a
+            /* Compare and update the larger time */
17b94a
+            imdata.ctime.tv_sec = mdata_iatt->ia_ctime;
17b94a
+            imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec;
17b94a
+            imdata.atime.tv_sec = mdata_iatt->ia_atime;
17b94a
+            imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec;
17b94a
+            imdata.mtime.tv_sec = mdata_iatt->ia_mtime;
17b94a
+            imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec;
17b94a
+
17b94a
+            if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) {
17b94a
+                mdata->ctime = imdata.ctime;
17b94a
+            }
17b94a
+            if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) {
17b94a
+                mdata->mtime = imdata.mtime;
17b94a
+            }
17b94a
+            if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) {
17b94a
+                mdata->atime = imdata.atime;
17b94a
+            }
17b94a
+        }
17b94a
 
17b94a
         ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
17b94a
         if (ret) {
17b94a
-- 
17b94a
1.8.3.1
17b94a