256ebe
From 2f07d12f902e371d8cb8c76007d558e3a727b56a Mon Sep 17 00:00:00 2001
256ebe
From: Kotresh HR <khiremat@redhat.com>
256ebe
Date: Tue, 9 Apr 2019 18:23:05 +0530
256ebe
Subject: [PATCH 122/124] posix/ctime: Fix stat(time attributes) inconsistency
256ebe
 during readdirp
256ebe
256ebe
Problem:
256ebe
   Creation of tar file on gluster volume throws warning
256ebe
'file changed as we read it'
256ebe
256ebe
Cause:
256ebe
   During readdirp, for few of the files whose inode is not
256ebe
present, time attributes were served from backend. This caused
256ebe
the ctime of few files to be different between before readdir
256ebe
and after readdir by tar.
256ebe
256ebe
Solution:
256ebe
  If ctime feature is enabled and inode is not present, don't
256ebe
serve the time attributes from backend file, serve it from xattr.
256ebe
256ebe
Backport of:
256ebe
 > Patch: https://review.gluster.org/22540
256ebe
 > fixes: bz#1698078
256ebe
 > Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae
256ebe
 > Signed-off-by: Kotresh HR <khiremat@redhat.com>
256ebe
256ebe
BUG: 1699709
256ebe
Change-Id: I427ef865f97399475faf5aa6ca495f7e317603ae
256ebe
Signed-off-by: Kotresh HR <khiremat@redhat.com>
256ebe
Reviewed-on: https://code.engineering.redhat.com/gerrit/168687
256ebe
Tested-by: RHGS Build Bot <nigelb@redhat.com>
256ebe
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
256ebe
---
256ebe
 tests/basic/ctime/ctime-readdir.c          | 29 +++++++++++++++++
256ebe
 tests/basic/ctime/ctime-readdir.t          | 50 ++++++++++++++++++++++++++++++
256ebe
 xlators/storage/posix/src/posix-helpers.c  | 29 +++++++++++------
256ebe
 xlators/storage/posix/src/posix-metadata.c | 41 ++++++++++++++----------
256ebe
 4 files changed, 123 insertions(+), 26 deletions(-)
256ebe
 create mode 100644 tests/basic/ctime/ctime-readdir.c
256ebe
 create mode 100644 tests/basic/ctime/ctime-readdir.t
256ebe
256ebe
diff --git a/tests/basic/ctime/ctime-readdir.c b/tests/basic/ctime/ctime-readdir.c
256ebe
new file mode 100644
256ebe
index 0000000..8760db2
256ebe
--- /dev/null
256ebe
+++ b/tests/basic/ctime/ctime-readdir.c
256ebe
@@ -0,0 +1,29 @@
256ebe
+#include <stdio.h>
256ebe
+#include <dirent.h>
256ebe
+#include <string.h>
256ebe
+#include <assert.h>
256ebe
+
256ebe
+int
256ebe
+main(int argc, char **argv)
256ebe
+{
256ebe
+    DIR *dir = NULL;
256ebe
+    struct dirent *entry = NULL;
256ebe
+    int ret = 0;
256ebe
+    char *path = NULL;
256ebe
+
256ebe
+    assert(argc == 2);
256ebe
+    path = argv[1];
256ebe
+
256ebe
+    dir = opendir(path);
256ebe
+    if (!dir) {
256ebe
+        printf("opendir(%s) failed.\n", path);
256ebe
+        return -1;
256ebe
+    }
256ebe
+
256ebe
+    while ((entry = readdir(dir)) != NULL) {
256ebe
+    }
256ebe
+    if (dir)
256ebe
+        closedir(dir);
256ebe
+
256ebe
+    return ret;
256ebe
+}
256ebe
diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t
256ebe
new file mode 100644
256ebe
index 0000000..4564fc1
256ebe
--- /dev/null
256ebe
+++ b/tests/basic/ctime/ctime-readdir.t
256ebe
@@ -0,0 +1,50 @@
256ebe
+#!/bin/bash
256ebe
+
256ebe
+. $(dirname $0)/../../include.rc
256ebe
+. $(dirname $0)/../../volume.rc
256ebe
+
256ebe
+cleanup;
256ebe
+
256ebe
+TEST glusterd
256ebe
+
256ebe
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
256ebe
+TEST $CLI volume set $V0 performance.stat-prefetch on
256ebe
+TEST $CLI volume set $V0 performance.readdir-ahead off
256ebe
+TEST $CLI volume start $V0;
256ebe
+
256ebe
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
256ebe
+
256ebe
+TEST mkdir $M0/dir0
256ebe
+TEST "echo hello_world > $M0/dir0/FILE"
256ebe
+
256ebe
+ctime1=$(stat -c %Z $M0/dir0/FILE)
256ebe
+echo "Mount change time: $ctime1"
256ebe
+
256ebe
+sleep 2
256ebe
+
256ebe
+#Write to back end directly to modify ctime of backend file
256ebe
+TEST "echo write_from_backend >> $B0/brick1/dir0/FILE"
256ebe
+TEST "echo write_from_backend >> $B0/brick2/dir0/FILE"
256ebe
+TEST "echo write_from_backend >> $B0/brick3/dir0/FILE"
256ebe
+echo "Backend change time"
256ebe
+echo "brick1: $(stat -c %Z $B0/brick1/dir0/FILE)"
256ebe
+echo "brick2: $(stat -c %Z $B0/brick2/dir0/FILE)"
256ebe
+echo "brick3: $(stat -c %Z $B0/brick3/dir0/FILE)"
256ebe
+
256ebe
+#Stop and start to hit the case of no inode for readdir
256ebe
+TEST umount $M0
256ebe
+TEST $CLI volume stop $V0
256ebe
+TEST $CLI volume start $V0
256ebe
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
256ebe
+
256ebe
+TEST build_tester $(dirname $0)/ctime-readdir.c
256ebe
+
256ebe
+#Do readdir
256ebe
+TEST ./$(dirname $0)/ctime-readdir $M0/dir0
256ebe
+
256ebe
+EXPECT "$ctime1" stat -c %Z $M0/dir0/FILE
256ebe
+echo "Mount change time after readdir $(stat -c %Z $M0/dir0/FILE)"
256ebe
+
256ebe
+cleanup_tester $(dirname $0)/ctime-readdir
256ebe
+
256ebe
+cleanup;
256ebe
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
256ebe
index 193afc5..37e33a9 100644
256ebe
--- a/xlators/storage/posix/src/posix-helpers.c
256ebe
+++ b/xlators/storage/posix/src/posix-helpers.c
256ebe
@@ -832,17 +832,26 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path,
256ebe
 
256ebe
     iatt_from_stat(&stbuf, &lstatbuf);
256ebe
 
256ebe
-    if (inode && priv->ctime) {
256ebe
-        if (!inode_locked) {
256ebe
-            ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
256ebe
+    if (priv->ctime) {
256ebe
+        if (inode) {
256ebe
+            if (!inode_locked) {
256ebe
+                ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
256ebe
+            } else {
256ebe
+                ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
256ebe
+            }
256ebe
+            if (ret) {
256ebe
+                gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
256ebe
+                       "posix get mdata failed on gfid: %s",
256ebe
+                       uuid_utoa(inode->gfid));
256ebe
+                goto out;
256ebe
+            }
256ebe
         } else {
256ebe
-            ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf);
256ebe
-        }
256ebe
-        if (ret) {
256ebe
-            gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
256ebe
-                   "posix get mdata failed on gfid: %s",
256ebe
-                   uuid_utoa(inode->gfid));
256ebe
-            goto out;
256ebe
+            ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf);
256ebe
+            if (ret) {
256ebe
+                gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED,
256ebe
+                       "posix get mdata failed on path: %s", path);
256ebe
+                goto out;
256ebe
+            }
256ebe
         }
256ebe
     }
256ebe
 
256ebe
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
256ebe
index 0ea9099..7ff5225 100644
256ebe
--- a/xlators/storage/posix/src/posix-metadata.c
256ebe
+++ b/xlators/storage/posix/src/posix-metadata.c
256ebe
@@ -79,6 +79,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
256ebe
         fd_based_fop = _gf_true;
256ebe
     }
256ebe
     if (!(fd_based_fop || real_path_arg)) {
256ebe
+        GF_VALIDATE_OR_GOTO(this->name, inode, out);
256ebe
         MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL);
256ebe
         if (!real_path) {
256ebe
             uuid_utoa_r(inode->gfid, gfid_str);
256ebe
@@ -114,14 +115,14 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
256ebe
                          key,
256ebe
                          real_path ? real_path
256ebe
                                    : (real_path_arg ? real_path_arg : "null"),
256ebe
-                         uuid_utoa(inode->gfid));
256ebe
+                         inode ? uuid_utoa(inode->gfid) : "null");
256ebe
         } else {
256ebe
             gf_msg(this->name, GF_LOG_DEBUG, *op_errno, P_MSG_XATTR_FAILED,
256ebe
                    "getxattr failed"
256ebe
                    " on %s gfid: %s key: %s ",
256ebe
                    real_path ? real_path
256ebe
                              : (real_path_arg ? real_path_arg : "null"),
256ebe
-                   uuid_utoa(inode->gfid), key);
256ebe
+                   inode ? uuid_utoa(inode->gfid) : "null", key);
256ebe
         }
256ebe
         op_ret = -1;
256ebe
         goto out;
256ebe
@@ -148,7 +149,7 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
256ebe
                "getxattr failed on "
256ebe
                " on %s gfid: %s key: %s ",
256ebe
                real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
256ebe
-               uuid_utoa(inode->gfid), key);
256ebe
+               inode ? uuid_utoa(inode->gfid) : "null", key);
256ebe
         goto out;
256ebe
     }
256ebe
 
256ebe
@@ -233,9 +234,14 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
256ebe
     int ret = -1;
256ebe
     int op_errno = 0;
256ebe
 
256ebe
-    GF_VALIDATE_OR_GOTO(this->name, inode, out);
256ebe
+    /* Handle readdirp: inode might be null, time attributes should be served
256ebe
+     * from xattr not from backend's file attributes */
256ebe
+    if (inode) {
256ebe
+        ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
256ebe
+    } else {
256ebe
+        ret = -1;
256ebe
+    }
256ebe
 
256ebe
-    ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata);
256ebe
     if (ret == -1 || !mdata) {
256ebe
         mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr);
256ebe
         if (!mdata) {
256ebe
@@ -251,7 +257,9 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
256ebe
              * is hit when in-memory status is lost due to brick
256ebe
              * down scenario
256ebe
              */
256ebe
-            __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
256ebe
+            if (inode) {
256ebe
+                __inode_ctx_set1(inode, this, (uint64_t *)&mdata);
256ebe
+            }
256ebe
         } else {
256ebe
             /* Failed to get mdata from disk, xattr missing.
256ebe
              * This happens on two cases.
256ebe
@@ -278,7 +286,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
256ebe
                  */
256ebe
                 gf_msg(this->name, GF_LOG_WARNING, op_errno,
256ebe
                        P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ",
256ebe
-                       real_path ? real_path : "null", uuid_utoa(inode->gfid),
256ebe
+                       real_path ? real_path : "null",
256ebe
+                       inode ? uuid_utoa(inode->gfid) : "null",
256ebe
                        GF_XATTR_MDATA_KEY);
256ebe
                 GF_FREE(mdata);
256ebe
                 ret = 0;
256ebe
@@ -297,6 +306,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd,
256ebe
         stbuf->ia_atime = mdata->atime.tv_sec;
256ebe
         stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
256ebe
     }
256ebe
+    /* Not set in inode context, hence free mdata */
256ebe
+    if (!inode) {
256ebe
+        GF_FREE(mdata);
256ebe
+    }
256ebe
 
256ebe
 out:
256ebe
     return ret;
256ebe
@@ -416,6 +429,11 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
256ebe
             }
256ebe
         }
256ebe
 
256ebe
+        if ((flag->ctime == 0) && (flag->mtime == 0) && (flag->atime == 0)) {
256ebe
+            ret = 0;
256ebe
+            goto unlock;
256ebe
+        }
256ebe
+
256ebe
         /* Earlier, mdata was updated only if the existing time is less
256ebe
          * than the time to be updated. This would fail the scenarios
256ebe
          * where mtime can be set to any time using the syscall. Hence
256ebe
@@ -486,7 +504,6 @@ out:
256ebe
         stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
256ebe
     }
256ebe
 
256ebe
-
256ebe
     return ret;
256ebe
 }
256ebe
 
256ebe
@@ -604,10 +621,6 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
256ebe
 
256ebe
     if (priv->ctime) {
256ebe
         (void)posix_get_mdata_flag(frame->root->flags, &flag;;
256ebe
-        if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
256ebe
-            goto out;
256ebe
-        }
256ebe
-
256ebe
         if (frame->root->ctime.tv_sec == 0) {
256ebe
             gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
256ebe
                    "posix set mdata failed, No ctime : %s gfid:%s", real_path,
256ebe
@@ -643,9 +656,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
256ebe
 
256ebe
     if (inode && priv->ctime) {
256ebe
         (void)posix_get_parent_mdata_flag(frame->root->flags, &flag;;
256ebe
-        if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
256ebe
-            goto out;
256ebe
-        }
256ebe
         ret = posix_set_mdata_xattr(this, real_path, fd, inode,
256ebe
                                     &frame->root->ctime, stbuf, &flag,
256ebe
                                     _gf_false);
256ebe
@@ -655,7 +665,6 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
256ebe
                    uuid_utoa(inode->gfid));
256ebe
         }
256ebe
     }
256ebe
-out:
256ebe
     return;
256ebe
 }
256ebe
 
256ebe
-- 
256ebe
1.8.3.1
256ebe