ca3909
From d5ce2300f77c25b38a076d4dd6a5521e82c56172 Mon Sep 17 00:00:00 2001
ca3909
From: Kotresh HR <khiremat@redhat.com>
ca3909
Date: Mon, 29 Jul 2019 18:30:42 +0530
ca3909
Subject: [PATCH 295/297] ctime/rebalance: Heal ctime xattr on directory during
ca3909
 rebalance
ca3909
ca3909
After add-brick and rebalance, the ctime xattr is not present
ca3909
on rebalanced directories on new brick. This patch fixes the
ca3909
same.
ca3909
ca3909
Note that ctime still doesn't support consistent time across
ca3909
distribute sub-volume.
ca3909
ca3909
This patch also fixes the in-memory inconsistency of time attributes
ca3909
when metadata is self healed.
ca3909
ca3909
Backport of:
ca3909
 > Patch: https://review.gluster.org/23127/
ca3909
 > Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df
ca3909
 > fixes: bz#1734026
ca3909
 > Signed-off-by: Kotresh HR <khiremat@redhat.com>
ca3909
ca3909
Change-Id: Ia20506f1839021bf61d4753191e7dc34b31bb2df
ca3909
BUG: 1728673
ca3909
Signed-off-by: Kotresh HR <khiremat@redhat.com>
ca3909
Reviewed-on: https://code.engineering.redhat.com/gerrit/181105
ca3909
Tested-by: RHGS Build Bot <nigelb@redhat.com>
ca3909
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
ca3909
---
ca3909
 tests/basic/afr/split-brain-healing-ctime.t        | 253 +++++++++++++++++++++
ca3909
 tests/basic/afr/split-brain-healing.t              |   1 +
ca3909
 tests/basic/ctime/ctime-ec-heal.t                  |  71 ++++++
ca3909
 tests/basic/ctime/ctime-ec-rebalance.t             |  44 ++++
ca3909
 tests/basic/ctime/ctime-rep-heal.t                 |  71 ++++++
ca3909
 tests/basic/ctime/ctime-rep-rebalance.t            |  42 ++++
ca3909
 .../bug-1734370-entry-heal-restore-time.t          |  84 +++++++
ca3909
 tests/volume.rc                                    |  15 +-
ca3909
 xlators/cluster/afr/src/afr-self-heal-common.c     |   3 +-
ca3909
 xlators/cluster/afr/src/afr-self-heal-entry.c      |   2 +
ca3909
 xlators/cluster/dht/src/dht-common.c               |   1 +
ca3909
 xlators/cluster/ec/src/ec-heal.c                   |   7 +-
ca3909
 xlators/storage/posix/src/posix-entry-ops.c        |   8 +-
ca3909
 xlators/storage/posix/src/posix-helpers.c          |  31 ++-
ca3909
 xlators/storage/posix/src/posix-inode-fd-ops.c     |  57 ++---
ca3909
 xlators/storage/posix/src/posix-metadata.c         |  65 +++++-
ca3909
 xlators/storage/posix/src/posix-metadata.h         |   7 +
ca3909
 xlators/storage/posix/src/posix.h                  |   5 +-
ca3909
 18 files changed, 714 insertions(+), 53 deletions(-)
ca3909
 create mode 100644 tests/basic/afr/split-brain-healing-ctime.t
ca3909
 create mode 100644 tests/basic/ctime/ctime-ec-heal.t
ca3909
 create mode 100644 tests/basic/ctime/ctime-ec-rebalance.t
ca3909
 create mode 100644 tests/basic/ctime/ctime-rep-heal.t
ca3909
 create mode 100644 tests/basic/ctime/ctime-rep-rebalance.t
ca3909
 create mode 100644 tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
ca3909
ca3909
diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t
ca3909
new file mode 100644
ca3909
index 0000000..1ca18e3
ca3909
--- /dev/null
ca3909
+++ b/tests/basic/afr/split-brain-healing-ctime.t
ca3909
@@ -0,0 +1,253 @@
ca3909
+#!/bin/bash
ca3909
+
ca3909
+#Test the split-brain resolution CLI commands.
ca3909
+. $(dirname $0)/../../include.rc
ca3909
+. $(dirname $0)/../../volume.rc
ca3909
+
ca3909
+function get_replicate_subvol_number {
ca3909
+        local filename=$1
ca3909
+        #get_backend_paths
ca3909
+        if [ -f $B0/${V0}1/$filename ]
ca3909
+        then
ca3909
+                echo 0
ca3909
+        elif [ -f $B0/${V0}3/$filename ]
ca3909
+        then    echo 1
ca3909
+        else
ca3909
+                echo -1
ca3909
+        fi
ca3909
+}
ca3909
+
ca3909
+cleanup;
ca3909
+
ca3909
+AREQUAL_PATH=$(dirname $0)/../../utils
ca3909
+GET_MDATA_PATH=$(dirname $0)/../../utils
ca3909
+CFLAGS=""
ca3909
+test "`uname -s`" != "Linux" && {
ca3909
+    CFLAGS="$CFLAGS -lintl";
ca3909
+}
ca3909
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
ca3909
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
ca3909
+
ca3909
+TEST glusterd
ca3909
+TEST pidof glusterd
ca3909
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
ca3909
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
ca3909
+TEST $CLI volume set $V0 cluster.data-self-heal off
ca3909
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
ca3909
+TEST $CLI volume set $V0 cluster.entry-self-heal off
ca3909
+TEST $CLI volume set $V0 ctime on
ca3909
+TEST $CLI volume start $V0
ca3909
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
ca3909
+
ca3909
+cd $M0
ca3909
+for i in {1..10}
ca3909
+do
ca3909
+        echo "Initial content">>file$i
ca3909
+done
ca3909
+
ca3909
+replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
ca3909
+replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
ca3909
+
ca3909
+############ Create data split-brain in the files. ###########################
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}1
ca3909
+for file in ${!replica_0_files_list[*]}
ca3909
+do
ca3909
+        echo "B1 is down">>${replica_0_files_list[$file]}
ca3909
+done
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}3
ca3909
+for file in ${!replica_1_files_list[*]}
ca3909
+do
ca3909
+        echo "B3 is down">>${replica_1_files_list[$file]}
ca3909
+done
ca3909
+
ca3909
+SMALLER_FILE_SIZE=$(stat -c %s file1)
ca3909
+
ca3909
+TEST $CLI volume start $V0 force
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
ca3909
+
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}2
ca3909
+for file in ${!replica_0_files_list[*]}
ca3909
+do
ca3909
+        echo "B2 is down">>${replica_0_files_list[$file]}
ca3909
+        echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
ca3909
+done
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}4
ca3909
+for file in ${!replica_1_files_list[*]}
ca3909
+do
ca3909
+        echo "B4 is down">>${replica_1_files_list[$file]}
ca3909
+        echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
ca3909
+done
ca3909
+
ca3909
+BIGGER_FILE_SIZE=$(stat -c %s file1)
ca3909
+TEST $CLI volume start $V0 force
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
ca3909
+
ca3909
+
ca3909
+############### Acessing the files should now give EIO. ###############################
ca3909
+TEST ! cat file1
ca3909
+TEST ! cat file2
ca3909
+TEST ! cat file3
ca3909
+TEST ! cat file4
ca3909
+TEST ! cat file5
ca3909
+TEST ! cat file6
ca3909
+TEST ! cat file7
ca3909
+TEST ! cat file8
ca3909
+TEST ! cat file9
ca3909
+TEST ! cat file10
ca3909
+###################
ca3909
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
ca3909
+
ca3909
+################ Heal file1 using the bigger-file option  ##############
ca3909
+$CLI volume heal $V0 split-brain bigger-file /file1
ca3909
+EXPECT "0" echo $?
ca3909
+EXPECT $BIGGER_FILE_SIZE stat -c %s file1
ca3909
+
ca3909
+################ Heal file2 using the bigger-file option and its gfid ##############
ca3909
+subvolume=$(get_replicate_subvol_number file2)
ca3909
+if [ $subvolume == 0 ]
ca3909
+then
ca3909
+        GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
ca3909
+elif [ $subvolume == 1 ]
ca3909
+then
ca3909
+        GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
ca3909
+fi
ca3909
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
ca3909
+$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
ca3909
+EXPECT "0" echo $?
ca3909
+
ca3909
+################ Heal file3 using the source-brick option  ##############
ca3909
+################ Use the brick having smaller file size as source #######
ca3909
+subvolume=$(get_replicate_subvol_number file3)
ca3909
+if [ $subvolume == 0 ]
ca3909
+then
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
ca3909
+elif [ $subvolume == 1 ]
ca3909
+then
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
ca3909
+fi
ca3909
+EXPECT "0" echo $?
ca3909
+EXPECT $SMALLER_FILE_SIZE stat -c %s file3
ca3909
+
ca3909
+################ Heal file4 using the source-brick option and it's gfid ##############
ca3909
+################ Use the brick having smaller file size as source #######
ca3909
+subvolume=$(get_replicate_subvol_number file4)
ca3909
+if [ $subvolume == 0 ]
ca3909
+then
ca3909
+        GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
ca3909
+        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
ca3909
+elif [ $subvolume == 1 ]
ca3909
+then
ca3909
+        GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
ca3909
+        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
ca3909
+fi
ca3909
+EXPECT "0" echo $?
ca3909
+EXPECT $SMALLER_FILE_SIZE stat -c %s file4
ca3909
+
ca3909
+# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed
ca3909
+# as part of metadata heal. So mtime would be same, hence it can't be healed
ca3909
+# using 'latest-mtime' policy, use 'source-brick' option instead.
ca3909
+################ Heal file5 using the source-brick option  ##############
ca3909
+subvolume=$(get_replicate_subvol_number file5)
ca3909
+if [ $subvolume == 0 ]
ca3909
+then
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5
ca3909
+elif [ $subvolume == 1 ]
ca3909
+then
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5
ca3909
+fi
ca3909
+EXPECT "0" echo $?
ca3909
+
ca3909
+if [ $subvolume == 0 ]
ca3909
+then
ca3909
+        mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
ca3909
+        mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
ca3909
+elif [ $subvolume == 1 ]
ca3909
+then
ca3909
+        mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
ca3909
+        mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
ca3909
+fi
ca3909
+
ca3909
+#TODO: To below comparisons on full sub-second resolution
ca3909
+
ca3909
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
ca3909
+
ca3909
+mtime_mount_after_heal=$(stat -c %Y file5)
ca3909
+
ca3909
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
ca3909
+
ca3909
+################ Heal file6 using the source-brick option and its gfid  ##############
ca3909
+subvolume=$(get_replicate_subvol_number file6)
ca3909
+if [ $subvolume == 0 ]
ca3909
+then
ca3909
+        GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
ca3909
+        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
ca3909
+elif [ $subvolume == 1 ]
ca3909
+then
ca3909
+        GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
ca3909
+        GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
ca3909
+        $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR
ca3909
+fi
ca3909
+EXPECT "0" echo $?
ca3909
+
ca3909
+if [ $subvolume == 0 ]
ca3909
+then
ca3909
+        mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
ca3909
+        mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
ca3909
+elif [ $subvolume == 1 ]
ca3909
+then
ca3909
+        mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
ca3909
+        mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
ca3909
+fi
ca3909
+
ca3909
+#TODO: To below comparisons on full sub-second resolution
ca3909
+
ca3909
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
ca3909
+
ca3909
+mtime_mount_after_heal=$(stat -c %Y file6)
ca3909
+
ca3909
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
ca3909
+
ca3909
+################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
ca3909
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
ca3909
+EXPECT "0" echo $?
ca3909
+
ca3909
+################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
ca3909
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
ca3909
+EXPECT "0" echo $?
ca3909
+
ca3909
+############### Reading the files should now succeed. ###############################
ca3909
+TEST  cat file1
ca3909
+TEST  cat file2
ca3909
+TEST  cat file3
ca3909
+TEST  cat file4
ca3909
+TEST  cat file5
ca3909
+TEST  cat file6
ca3909
+TEST  cat file7
ca3909
+TEST  cat file8
ca3909
+TEST  cat file9
ca3909
+TEST  cat file10
ca3909
+
ca3909
+################ File contents on the bricks must be same. ################################
ca3909
+TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
ca3909
+TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
ca3909
+
ca3909
+############### Trying to heal files not in SB should fail. ###############################
ca3909
+$CLI volume heal $V0 split-brain bigger-file /file1
ca3909
+EXPECT "1" echo $?
ca3909
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
ca3909
+EXPECT "1" echo $?
ca3909
+
ca3909
+cd -
ca3909
+TEST rm $AREQUAL_PATH/arequal-checksum
ca3909
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
ca3909
+cleanup
ca3909
diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
ca3909
index 78553e6..315e815 100644
ca3909
--- a/tests/basic/afr/split-brain-healing.t
ca3909
+++ b/tests/basic/afr/split-brain-healing.t
ca3909
@@ -35,6 +35,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off
ca3909
 TEST $CLI volume set $V0 cluster.data-self-heal off
ca3909
 TEST $CLI volume set $V0 cluster.metadata-self-heal off
ca3909
 TEST $CLI volume set $V0 cluster.entry-self-heal off
ca3909
+TEST $CLI volume set $V0 ctime off
ca3909
 TEST $CLI volume start $V0
ca3909
 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
ca3909
 
ca3909
diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t
ca3909
new file mode 100644
ca3909
index 0000000..1cb4516
ca3909
--- /dev/null
ca3909
+++ b/tests/basic/ctime/ctime-ec-heal.t
ca3909
@@ -0,0 +1,71 @@
ca3909
+#!/bin/bash
ca3909
+#
ca3909
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
ca3909
+#
ca3909
+###
ca3909
+
ca3909
+. $(dirname $0)/../../include.rc
ca3909
+. $(dirname $0)/../../volume.rc
ca3909
+. $(dirname $0)/../../afr.rc
ca3909
+
ca3909
+cleanup
ca3909
+
ca3909
+#cleate and start volume
ca3909
+TEST glusterd
ca3909
+TEST pidof glusterd
ca3909
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3}
ca3909
+TEST $CLI volume set $V0 ctime on
ca3909
+TEST $CLI volume start $V0
ca3909
+
ca3909
+#Mount the volume
ca3909
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
ca3909
+
ca3909
+# Create files
ca3909
+mkdir $M0/dir1
ca3909
+echo "Initial content" > $M0/file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
ca3909
+
ca3909
+# Kill brick
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}3
ca3909
+
ca3909
+echo "B3 is down" >> $M0/file1
ca3909
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
ca3909
+echo "Entry heal file" > $M0/entry_heal_file1
ca3909
+mkdir $M0/entry_heal_dir1
ca3909
+
ca3909
+# Check xattr
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+
ca3909
+TEST $CLI volume start $V0 force
ca3909
+$CLI volume heal $V0
ca3909
+
ca3909
+# Check xattr
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+
ca3909
+cleanup;
ca3909
diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t
ca3909
new file mode 100644
ca3909
index 0000000..caccdc1
ca3909
--- /dev/null
ca3909
+++ b/tests/basic/ctime/ctime-ec-rebalance.t
ca3909
@@ -0,0 +1,44 @@
ca3909
+#!/bin/bash
ca3909
+#
ca3909
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
ca3909
+#
ca3909
+###
ca3909
+
ca3909
+. $(dirname $0)/../../include.rc
ca3909
+. $(dirname $0)/../../volume.rc
ca3909
+. $(dirname $0)/../../fallocate.rc
ca3909
+
ca3909
+cleanup
ca3909
+
ca3909
+#cleate and start volume
ca3909
+TEST glusterd
ca3909
+TEST pidof glusterd
ca3909
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
ca3909
+TEST $CLI volume set $V0 ctime on
ca3909
+TEST $CLI volume start $V0
ca3909
+
ca3909
+#Mount the volume
ca3909
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
ca3909
+
ca3909
+# Create files
ca3909
+mkdir $M0/dir1
ca3909
+echo "test data" > $M0/dir1/file1
ca3909
+
ca3909
+# Add brick
ca3909
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
ca3909
+
ca3909
+#Trigger rebalance
ca3909
+TEST $CLI volume rebalance $V0 start force
ca3909
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
ca3909
+
ca3909
+#Verify ctime xattr heal on directory
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
ca3909
+
ca3909
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
ca3909
+
ca3909
+cleanup;
ca3909
diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t
ca3909
new file mode 100644
ca3909
index 0000000..ba8b08a
ca3909
--- /dev/null
ca3909
+++ b/tests/basic/ctime/ctime-rep-heal.t
ca3909
@@ -0,0 +1,71 @@
ca3909
+#!/bin/bash
ca3909
+#
ca3909
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
ca3909
+#
ca3909
+###
ca3909
+
ca3909
+. $(dirname $0)/../../include.rc
ca3909
+. $(dirname $0)/../../volume.rc
ca3909
+. $(dirname $0)/../../afr.rc
ca3909
+
ca3909
+cleanup
ca3909
+
ca3909
+#cleate and start volume
ca3909
+TEST glusterd
ca3909
+TEST pidof glusterd
ca3909
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
ca3909
+TEST $CLI volume set $V0 ctime on
ca3909
+TEST $CLI volume start $V0
ca3909
+
ca3909
+#Mount the volume
ca3909
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
ca3909
+
ca3909
+# Create files
ca3909
+mkdir $M0/dir1
ca3909
+echo "Initial content" > $M0/file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
ca3909
+
ca3909
+# Kill brick
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}3
ca3909
+
ca3909
+echo "B3 is down" >> $M0/file1
ca3909
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
ca3909
+echo "Entry heal file" > $M0/entry_heal_file1
ca3909
+mkdir $M0/entry_heal_dir1
ca3909
+
ca3909
+# Check xattr
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+
ca3909
+TEST $CLI volume start $V0 force
ca3909
+$CLI volume heal $V0
ca3909
+
ca3909
+# Check xattr
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
ca3909
+
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
ca3909
+
ca3909
+cleanup;
ca3909
diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t
ca3909
new file mode 100644
ca3909
index 0000000..dd9743e
ca3909
--- /dev/null
ca3909
+++ b/tests/basic/ctime/ctime-rep-rebalance.t
ca3909
@@ -0,0 +1,42 @@
ca3909
+#!/bin/bash
ca3909
+#
ca3909
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
ca3909
+#
ca3909
+###
ca3909
+
ca3909
+. $(dirname $0)/../../include.rc
ca3909
+. $(dirname $0)/../../volume.rc
ca3909
+. $(dirname $0)/../../afr.rc
ca3909
+
ca3909
+cleanup
ca3909
+
ca3909
+#cleate and start volume
ca3909
+TEST glusterd
ca3909
+TEST pidof glusterd
ca3909
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
ca3909
+TEST $CLI volume set $V0 ctime on
ca3909
+TEST $CLI volume start $V0
ca3909
+
ca3909
+#Mount the volume
ca3909
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
ca3909
+
ca3909
+# Create files
ca3909
+mkdir $M0/dir1
ca3909
+
ca3909
+# Add brick
ca3909
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
ca3909
+
ca3909
+#Trigger rebalance
ca3909
+TEST $CLI volume rebalance $V0 start force
ca3909
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
ca3909
+
ca3909
+#Verify ctime xattr heal on directory
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
ca3909
+
ca3909
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
ca3909
+
ca3909
+cleanup;
ca3909
diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
ca3909
new file mode 100644
ca3909
index 0000000..298d6ed
ca3909
--- /dev/null
ca3909
+++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
ca3909
@@ -0,0 +1,84 @@
ca3909
+#!/bin/bash
ca3909
+
ca3909
+. $(dirname $0)/../../include.rc
ca3909
+. $(dirname $0)/../../volume.rc
ca3909
+. $(dirname $0)/../../afr.rc
ca3909
+
ca3909
+cleanup;
ca3909
+
ca3909
+function time_stamps_match {
ca3909
+        path=$1
ca3909
+        mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
ca3909
+        atime_source_b0=$(get_atime $B0/${V0}0/$path)
ca3909
+        mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
ca3909
+        atime_source_b2=$(get_atime $B0/${V0}2/$path)
ca3909
+        mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
ca3909
+        atime_sink_b1=$(get_atime $B0/${V0}1/$path)
ca3909
+
ca3909
+        #The same brick must be the source of heal for both atime and mtime.
ca3909
+        if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \
ca3909
+              ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]]
ca3909
+        then
ca3909
+            echo "Y"
ca3909
+        else
ca3909
+            echo "N"
ca3909
+        fi
ca3909
+
ca3909
+}
ca3909
+
ca3909
+# Test that the parent dir's timestamps are restored during entry-heal.
ca3909
+GET_MDATA_PATH=$(dirname $0)/../../utils
ca3909
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
ca3909
+
ca3909
+TEST glusterd;
ca3909
+TEST pidof glusterd;
ca3909
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
ca3909
+TEST $CLI volume set $V0 ctime on
ca3909
+TEST $CLI volume start $V0;
ca3909
+
ca3909
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
ca3909
+
ca3909
+###############################################################################
ca3909
+TEST mkdir $M0/DIR
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}1
ca3909
+TEST touch $M0/DIR/FILE
ca3909
+
ca3909
+TEST $CLI volume start $V0 force
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
ca3909
+TEST $CLI volume heal $V0
ca3909
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
ca3909
+
ca3909
+EXPECT "Y" time_stamps_match DIR
ca3909
+ctime_source1=$(get_ctime $B0/${V0}0/$path)
ca3909
+ctime_source2=$(get_ctime $B0/${V0}2/$path)
ca3909
+ctime_sink=$(get_ctime $B0/${V0}1/$path)
ca3909
+TEST [ $ctime_source1 -eq $ctime_sink ]
ca3909
+TEST [ $ctime_source2 -eq $ctime_sink ]
ca3909
+
ca3909
+###############################################################################
ca3909
+# Repeat the test with ctime feature disabled.
ca3909
+TEST $CLI volume set $V0 features.ctime off
ca3909
+TEST mkdir $M0/DIR2
ca3909
+TEST kill_brick $V0 $H0 $B0/${V0}1
ca3909
+TEST touch $M0/DIR2/FILE
ca3909
+
ca3909
+TEST $CLI volume start $V0 force
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
ca3909
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
ca3909
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
ca3909
+TEST $CLI volume heal $V0
ca3909
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
ca3909
+
ca3909
+EXPECT "Y" time_stamps_match DIR2
ca3909
+
ca3909
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
ca3909
+cleanup;
ca3909
diff --git a/tests/volume.rc b/tests/volume.rc
ca3909
index 76a8fd4..9a002d9 100644
ca3909
--- a/tests/volume.rc
ca3909
+++ b/tests/volume.rc
ca3909
@@ -371,6 +371,19 @@ function get_gfid2path {
ca3909
         getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null
ca3909
 }
ca3909
 
ca3909
+function get_mdata {
ca3909
+        local path=$1
ca3909
+        getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'='
ca3909
+}
ca3909
+
ca3909
+function get_mdata_count {
ca3909
+    getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l
ca3909
+}
ca3909
+
ca3909
+function get_mdata_uniq_count {
ca3909
+    getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l
ca3909
+}
ca3909
+
ca3909
 function get_xattr_key {
ca3909
         local key=$1
ca3909
         local path=$2
ca3909
@@ -925,7 +938,7 @@ function get_ctime {
ca3909
     local time=$(get-mdata-xattr -c $1)
ca3909
     if [ $time == "-1" ];
ca3909
     then
ca3909
-        echo $(stat -c %Z $2)
ca3909
+        echo $(stat -c %Z $1)
ca3909
     else
ca3909
         echo $time
ca3909
     fi
ca3909
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
ca3909
index b38085a..81ef38a 100644
ca3909
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
ca3909
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
ca3909
@@ -513,7 +513,8 @@ afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
ca3909
 
ca3909
     AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
ca3909
                &replies[source].poststat,
ca3909
-               (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME), NULL);
ca3909
+               (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME),
ca3909
+               NULL);
ca3909
 
ca3909
     loc_wipe(&loc;;
ca3909
 
ca3909
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
ca3909
index e07b521..35b600f 100644
ca3909
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
ca3909
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
ca3909
@@ -1032,6 +1032,8 @@ unlock:
ca3909
             goto postop_unlock;
ca3909
         }
ca3909
 
ca3909
+        afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
ca3909
+                                  locked_replies);
ca3909
         ret = afr_selfheal_undo_pending(
ca3909
             frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
ca3909
             AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
ca3909
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
ca3909
index 219b072..99cccd6 100644
ca3909
--- a/xlators/cluster/dht/src/dht-common.c
ca3909
+++ b/xlators/cluster/dht/src/dht-common.c
ca3909
@@ -115,6 +115,7 @@ char *xattrs_to_heal[] = {"user.",
ca3909
                           QUOTA_LIMIT_KEY,
ca3909
                           QUOTA_LIMIT_OBJECTS_KEY,
ca3909
                           GF_SELINUX_XATTR_KEY,
ca3909
+                          GF_XATTR_MDATA_KEY,
ca3909
                           NULL};
ca3909
 
ca3909
 char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
ca3909
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
ca3909
index 0f0f398..06a7016 100644
ca3909
--- a/xlators/cluster/ec/src/ec-heal.c
ca3909
+++ b/xlators/cluster/ec/src/ec-heal.c
ca3909
@@ -2301,9 +2301,10 @@ ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
ca3909
 
ca3909
         loc.inode = inode_ref(fd->inode);
ca3909
         gf_uuid_copy(loc.gfid, fd->inode->gfid);
ca3909
-        ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, replies,
ca3909
-                              output, frame, ec->xl, &loc, &source_buf,
ca3909
-                              GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME, NULL);
ca3909
+        ret = cluster_setattr(
ca3909
+            ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
ca3909
+            ec->xl, &loc, &source_buf,
ca3909
+            GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
ca3909
         EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
ca3909
         if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
ca3909
             ret = -ENOTCONN;
ca3909
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
ca3909
index 34ee2b8..283b305 100644
ca3909
--- a/xlators/storage/posix/src/posix-entry-ops.c
ca3909
+++ b/xlators/storage/posix/src/posix-entry-ops.c
ca3909
@@ -500,7 +500,7 @@ post_op:
ca3909
         posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
ca3909
     }
ca3909
 
ca3909
-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
ca3909
+    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
ca3909
     if (op_ret) {
ca3909
         if (errno != EEXIST)
ca3909
             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
ca3909
@@ -828,7 +828,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
ca3909
                "setting ACLs on %s failed ", real_path);
ca3909
     }
ca3909
 
ca3909
-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
ca3909
+    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
ca3909
     if (op_ret) {
ca3909
         gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
ca3909
                "setting xattrs on %s failed", real_path);
ca3909
@@ -1529,7 +1529,7 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
ca3909
     }
ca3909
 
ca3909
 ignore:
ca3909
-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
ca3909
+    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
ca3909
     if (op_ret) {
ca3909
         gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
ca3909
                "setting xattrs on %s failed ", real_path);
ca3909
@@ -2167,7 +2167,7 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
ca3909
         posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name);
ca3909
     }
ca3909
 ignore:
ca3909
-    op_ret = posix_entry_create_xattr_set(this, real_path, xdata);
ca3909
+    op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata);
ca3909
     if (op_ret) {
ca3909
         gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
ca3909
                "setting xattrs on %s failed ", real_path);
ca3909
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
ca3909
index d143d4c..6a1a35c 100644
ca3909
--- a/xlators/storage/posix/src/posix-helpers.c
ca3909
+++ b/xlators/storage/posix/src/posix-helpers.c
ca3909
@@ -1188,11 +1188,15 @@ posix_dump_buffer(xlator_t *this, const char *real_path, const char *key,
ca3909
 #endif
ca3909
 
ca3909
 int
ca3909
-posix_handle_pair(xlator_t *this, const char *real_path, char *key,
ca3909
+posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
ca3909
                   data_t *value, int flags, struct iatt *stbuf)
ca3909
 {
ca3909
     int sys_ret = -1;
ca3909
     int ret = 0;
ca3909
+    int op_errno = 0;
ca3909
+    struct mdata_iatt mdata_iatt = {
ca3909
+        0,
ca3909
+    };
ca3909
 #ifdef GF_DARWIN_HOST_OS
ca3909
     const int error_code = EINVAL;
ca3909
 #else
ca3909
@@ -1216,6 +1220,23 @@ posix_handle_pair(xlator_t *this, const char *real_path, char *key,
ca3909
         /* ignore this key value pair */
ca3909
         ret = 0;
ca3909
         goto out;
ca3909
+    } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) {
ca3909
+        /* This is either by rebalance or self heal. Create the xattr if it's
ca3909
+         * not present. Compare and update the larger value if the xattr is
ca3909
+         * already present.
ca3909
+         */
ca3909
+        if (loc == NULL) {
ca3909
+            ret = -EINVAL;
ca3909
+            goto out;
ca3909
+        }
ca3909
+        posix_mdata_iatt_from_disk(&mdata_iatt,
ca3909
+                                   (posix_mdata_disk_t *)value->data);
ca3909
+        ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
ca3909
+                                                 &mdata_iatt, &op_errno);
ca3909
+        if (ret != 0) {
ca3909
+            ret = -op_errno;
ca3909
+        }
ca3909
+        goto out;
ca3909
     } else {
ca3909
         sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags);
ca3909
 #ifdef GF_DARWIN_HOST_OS
ca3909
@@ -1810,8 +1831,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
ca3909
         return 0;
ca3909
     }
ca3909
 
ca3909
-    ret = posix_handle_pair(filler->this, filler->real_path, k, v, XATTR_CREATE,
ca3909
-                            filler->stbuf);
ca3909
+    ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
ca3909
+                            XATTR_CREATE, filler->stbuf);
ca3909
     if (ret < 0) {
ca3909
         errno = -ret;
ca3909
         return -1;
ca3909
@@ -1820,7 +1841,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
ca3909
 }
ca3909
 
ca3909
 int
ca3909
-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict)
ca3909
+posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
ca3909
+                             dict_t *dict)
ca3909
 {
ca3909
     int ret = -1;
ca3909
 
ca3909
@@ -1834,6 +1856,7 @@ posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict)
ca3909
     filler.this = this;
ca3909
     filler.real_path = path;
ca3909
     filler.stbuf = NULL;
ca3909
+    filler.loc = loc;
ca3909
 
ca3909
     ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler);
ca3909
 
ca3909
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
ca3909
index e0ea85b..a2a518f 100644
ca3909
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
ca3909
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
ca3909
@@ -429,22 +429,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
ca3909
                                     &frame->root->ctime, stbuf, valid);
ca3909
     }
ca3909
 
ca3909
-    if (valid & GF_SET_ATTR_CTIME && !priv->ctime) {
ca3909
-        /*
ca3909
-         * If ctime is not enabled, we have no means to associate an
ca3909
-         * arbitrary ctime with the file, so as a fallback, we ignore
ca3909
-         * the ctime payload and update the file ctime to current time
ca3909
-         * (which is possible directly with the POSIX API).
ca3909
-         */
ca3909
-        op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL);
ca3909
-        if (op_ret == -1) {
ca3909
-            op_errno = errno;
ca3909
-            gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED,
ca3909
-                   "setattr (utimes) on %s "
ca3909
-                   "failed",
ca3909
-                   real_path);
ca3909
-            goto out;
ca3909
-        }
ca3909
+    if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
ca3909
+        posix_update_ctime_in_mdata(this, real_path, -1, loc->inode,
ca3909
+                                    &frame->root->ctime, stbuf, valid);
ca3909
     }
ca3909
 
ca3909
     if (!valid) {
ca3909
@@ -469,14 +456,6 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
ca3909
         goto out;
ca3909
     }
ca3909
 
ca3909
-    if (valid & GF_SET_ATTR_CTIME && priv->ctime) {
ca3909
-        /*
ca3909
-         * If we got ctime payload, we override
ca3909
-         * the ctime of statpost with that.
ca3909
-         */
ca3909
-        statpost.ia_ctime = stbuf->ia_ctime;
ca3909
-        statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec;
ca3909
-    }
ca3909
     posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost);
ca3909
 
ca3909
     if (xdata)
ca3909
@@ -592,6 +571,7 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
ca3909
     struct iatt statpost = {
ca3909
         0,
ca3909
     };
ca3909
+    struct posix_private *priv = NULL;
ca3909
     struct posix_fd *pfd = NULL;
ca3909
     dict_t *xattr_rsp = NULL;
ca3909
     int32_t ret = -1;
ca3909
@@ -604,6 +584,9 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
ca3909
     VALIDATE_OR_GOTO(this, out);
ca3909
     VALIDATE_OR_GOTO(fd, out);
ca3909
 
ca3909
+    priv = this->private;
ca3909
+    VALIDATE_OR_GOTO(priv, out);
ca3909
+
ca3909
     ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
ca3909
     if (ret < 0) {
ca3909
         gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
ca3909
@@ -656,6 +639,11 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
ca3909
                                     &frame->root->ctime, stbuf, valid);
ca3909
     }
ca3909
 
ca3909
+    if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
ca3909
+        posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode,
ca3909
+                                    &frame->root->ctime, stbuf, valid);
ca3909
+    }
ca3909
+
ca3909
     if (!valid) {
ca3909
         op_ret = sys_fchown(pfd->fd, -1, -1);
ca3909
         if (op_ret == -1) {
ca3909
@@ -2578,7 +2566,7 @@ _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
ca3909
 
ca3909
     filler = tmp;
ca3909
 
ca3909
-    return posix_handle_pair(filler->this, filler->real_path, k, v,
ca3909
+    return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
ca3909
                              filler->flags, filler->stbuf);
ca3909
 }
ca3909
 
ca3909
@@ -2641,27 +2629,27 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
ca3909
     priv = this->private;
ca3909
     DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
ca3909
 
ca3909
+    MAKE_INODE_HANDLE(real_path, this, loc, NULL);
ca3909
+    if (!real_path) {
ca3909
+        op_ret = -1;
ca3909
+        op_errno = ESTALE;
ca3909
+        goto out;
ca3909
+    }
ca3909
+
ca3909
     ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt);
ca3909
     if (ret == 0) {
ca3909
         /* This is initiated by lookup when ctime feature is enabled to create
ca3909
          * "trusted.glusterfs.mdata" xattr if not present. These are the files
ca3909
          * which were created when ctime feature is disabled.
ca3909
          */
ca3909
-        ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, &mdata_iatt,
ca3909
-                                                 &op_errno);
ca3909
+        ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
ca3909
+                                                 &mdata_iatt, &op_errno);
ca3909
         if (ret != 0) {
ca3909
             op_ret = -1;
ca3909
         }
ca3909
         goto out;
ca3909
     }
ca3909
 
ca3909
-    MAKE_INODE_HANDLE(real_path, this, loc, NULL);
ca3909
-    if (!real_path) {
ca3909
-        op_ret = -1;
ca3909
-        op_errno = ESTALE;
ca3909
-        goto out;
ca3909
-    }
ca3909
-
ca3909
     posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false);
ca3909
 
ca3909
     op_ret = -1;
ca3909
@@ -2796,6 +2784,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
ca3909
     filler.real_path = real_path;
ca3909
     filler.this = this;
ca3909
     filler.stbuf = &preo;;
ca3909
+    filler.loc = loc;
ca3909
 
ca3909
 #ifdef GF_DARWIN_HOST_OS
ca3909
     filler.flags = map_xattr_flags(flags);
ca3909
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
ca3909
index 532daa2..9efaf99 100644
ca3909
--- a/xlators/storage/posix/src/posix-metadata.c
ca3909
+++ b/xlators/storage/posix/src/posix-metadata.c
ca3909
@@ -56,6 +56,19 @@ posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
ca3909
     out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
ca3909
 }
ca3909
 
ca3909
+void
ca3909
+posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in)
ca3909
+{
ca3909
+    out->ia_ctime = be64toh(in->ctime.tv_sec);
ca3909
+    out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec);
ca3909
+
ca3909
+    out->ia_mtime = be64toh(in->mtime.tv_sec);
ca3909
+    out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec);
ca3909
+
ca3909
+    out->ia_atime = be64toh(in->atime.tv_sec);
ca3909
+    out->ia_atime_nsec = be64toh(in->atime.tv_nsec);
ca3909
+}
ca3909
+
ca3909
 /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
ca3909
 static int
ca3909
 posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd,
ca3909
@@ -341,6 +354,7 @@ posix_compare_timespec(struct timespec *first, struct timespec *second)
ca3909
 
ca3909
 int
ca3909
 posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
ca3909
+                                   const char *realpath,
ca3909
                                    struct mdata_iatt *mdata_iatt, int *op_errno)
ca3909
 {
ca3909
     posix_mdata_t *mdata = NULL;
ca3909
@@ -369,8 +383,8 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
ca3909
                 goto unlock;
ca3909
             }
ca3909
 
ca3909
-            ret = posix_fetch_mdata_xattr(this, NULL, -1, inode, (void *)mdata,
ca3909
-                                          op_errno);
ca3909
+            ret = posix_fetch_mdata_xattr(this, realpath, -1, inode,
ca3909
+                                          (void *)mdata, op_errno);
ca3909
             if (ret == 0) {
ca3909
                 /* Got mdata from disk. This is a race, another client
ca3909
                  * has healed the xattr during lookup. So set it in inode
ca3909
@@ -412,7 +426,7 @@ posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
ca3909
             }
ca3909
         }
ca3909
 
ca3909
-        ret = posix_store_mdata_xattr(this, NULL, -1, inode, mdata);
ca3909
+        ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata);
ca3909
         if (ret) {
ca3909
             gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED,
ca3909
                    "gfid: %s key:%s ", uuid_utoa(inode->gfid),
ca3909
@@ -445,7 +459,8 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd,
ca3909
     GF_VALIDATE_OR_GOTO(this->name, inode, out);
ca3909
     GF_VALIDATE_OR_GOTO(this->name, time, out);
ca3909
 
ca3909
-    if (update_utime && (!u_atime || !u_mtime)) {
ca3909
+    if (update_utime && (flag->ctime && !time) && (flag->atime && !u_atime) &&
ca3909
+        (flag->mtime && !u_mtime)) {
ca3909
         goto out;
ca3909
     }
ca3909
 
ca3909
@@ -652,6 +667,48 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
ca3909
     return;
ca3909
 }
ca3909
 
ca3909
+/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs
ca3909
+ * to be modified
ca3909
+ */
ca3909
+void
ca3909
+posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
ca3909
+                            inode_t *inode, struct timespec *ctime,
ca3909
+                            struct iatt *stbuf, int valid)
ca3909
+{
ca3909
+    int32_t ret = 0;
ca3909
+#if defined(HAVE_UTIMENSAT)
ca3909
+    struct timespec tv_ctime = {
ca3909
+        0,
ca3909
+    };
ca3909
+#else
ca3909
+    struct timeval tv_ctime = {
ca3909
+        0,
ca3909
+    };
ca3909
+#endif
ca3909
+    posix_mdata_flag_t flag = {
ca3909
+        0,
ca3909
+    };
ca3909
+
ca3909
+    struct posix_private *priv = NULL;
ca3909
+    priv = this->private;
ca3909
+
ca3909
+    if (inode && priv->ctime) {
ca3909
+        tv_ctime.tv_sec = stbuf->ia_ctime;
ca3909
+        SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec);
ca3909
+        flag.ctime = 1;
ca3909
+
ca3909
+        ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL,
ca3909
+                                    NULL, NULL, &flag, _gf_true);
ca3909
+        if (ret) {
ca3909
+            gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
ca3909
+                   "posix set mdata atime failed on file:"
ca3909
+                   " %s gfid:%s",
ca3909
+                   real_path, uuid_utoa(inode->gfid));
ca3909
+        }
ca3909
+    }
ca3909
+    return;
ca3909
+}
ca3909
+
ca3909
 static void
ca3909
 posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag)
ca3909
 {
ca3909
diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
ca3909
index c176699..63e8771 100644
ca3909
--- a/xlators/storage/posix/src/posix-metadata.h
ca3909
+++ b/xlators/storage/posix/src/posix-metadata.h
ca3909
@@ -43,6 +43,10 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd,
ca3909
                             inode_t *inode, struct timespec *ctime,
ca3909
                             struct iatt *stbuf, int valid);
ca3909
 void
ca3909
+posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd,
ca3909
+                            inode_t *inode, struct timespec *ctime,
ca3909
+                            struct iatt *stbuf, int valid);
ca3909
+void
ca3909
 posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path,
ca3909
                 int fd, inode_t *inode, struct iatt *stbuf);
ca3909
 void
ca3909
@@ -56,7 +60,10 @@ posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
ca3909
                     int fd_out, inode_t *inode_out, struct iatt *stbuf_out);
ca3909
 int
ca3909
 posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode,
ca3909
+                                   const char *realpath,
ca3909
                                    struct mdata_iatt *mdata_iatt,
ca3909
                                    int *op_errno);
ca3909
+void
ca3909
+posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in);
ca3909
 
ca3909
 #endif /* _POSIX_METADATA_H */
ca3909
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
ca3909
index 64288a7..dd51062 100644
ca3909
--- a/xlators/storage/posix/src/posix.h
ca3909
+++ b/xlators/storage/posix/src/posix.h
ca3909
@@ -339,7 +339,7 @@ dict_t *
ca3909
 posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd,
ca3909
                  int fdnum, dict_t *xattr, struct iatt *buf);
ca3909
 int
ca3909
-posix_handle_pair(xlator_t *this, const char *real_path, char *key,
ca3909
+posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key,
ca3909
                   data_t *value, int flags, struct iatt *stbuf);
ca3909
 int
ca3909
 posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key,
ca3909
@@ -352,7 +352,8 @@ int
ca3909
 posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc,
ca3909
                 dict_t *xattr_req);
ca3909
 int
ca3909
-posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict);
ca3909
+posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path,
ca3909
+                             dict_t *dict);
ca3909
 
ca3909
 int
ca3909
 posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd,
ca3909
-- 
ca3909
1.8.3.1
ca3909