|
|
74096c |
From 0502383024cbf7e4776816e0a992dccc484a3cf2 Mon Sep 17 00:00:00 2001
|
|
|
74096c |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
74096c |
Date: Tue, 8 Dec 2020 17:23:22 +0530
|
|
|
74096c |
Subject: [PATCH 488/511] glusterd/afr: enable granular-entry-heal by default
|
|
|
74096c |
|
|
|
74096c |
XXXXXXXXXXXXXXXXXXX
|
|
|
74096c |
IMPORTANT:
|
|
|
74096c |
XXXXXXXXXXXXXXXXXXXX
|
|
|
74096c |
I see that for rhgs-3.5.3, GD_OP_VERSION_MAX is GD_OP_VERSION_7_0. Since
|
|
|
74096c |
this patch should only act on new volumes in rhgs-3.5.4, I am bumping
|
|
|
74096c |
the op-version to GD_OP_VERSION_7_1. In glusterfs upstream, the patch
|
|
|
74096c |
acts only if op-version >= GD_OP_VERSION_9_0 as seen in the commit
|
|
|
74096c |
messae below.
|
|
|
74096c |
|
|
|
74096c |
Upstream patch details:
|
|
|
74096c |
/------------------------------------------------------------------------------/
|
|
|
74096c |
1. The option has been enabled and tested for quite some time now in RHHI-V
|
|
|
74096c |
downstream and I think it is safe to make it 'on' by default. Since it
|
|
|
74096c |
is not possible to simply change it from 'off' to 'on' without breaking
|
|
|
74096c |
rolling upgrades, old clients etc., I have made it default only for new volumes
|
|
|
74096c |
starting from op-verison GD_OP_VERSION_9_0.
|
|
|
74096c |
|
|
|
74096c |
Note: If you do a volume reset, the option will be turned back off.
|
|
|
74096c |
This is okay as the dir's gfid will be captured in 'xattrop' folder and heals
|
|
|
74096c |
will proceed. There might be stale entries inside entry-changes' folder,
|
|
|
74096c |
which will be removed when we enable the option again.
|
|
|
74096c |
|
|
|
74096c |
2. I encountered a cust. issue where entry heal was pending on a dir. with
|
|
|
74096c |
236436 files in it and the glustershd.log output was just stuck at
|
|
|
74096c |
"performing entry selfheal", so I have added logs to give us
|
|
|
74096c |
more info in DEBUG level about whether entry heal and data heal are
|
|
|
74096c |
progressing (metadata heal doesn't take much time). That way, we have a
|
|
|
74096c |
quick visual indication to say things are not 'stuck' if we briefly
|
|
|
74096c |
enable debug logs, instead of taking statedumps or checking profile info
|
|
|
74096c |
etc.
|
|
|
74096c |
|
|
|
74096c |
>Fixes: #1483
|
|
|
74096c |
>Change-Id: I4f116f8c92f8cd33f209b758ff14f3c7e1981422
|
|
|
74096c |
>Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
74096c |
Upstream Patch: https://github.com/gluster/glusterfs/pull/1621
|
|
|
74096c |
/------------------------------------------------------------------------------/
|
|
|
74096c |
|
|
|
74096c |
BUG: 1890506
|
|
|
74096c |
Change-Id: If449a1e873633616cfc508d74b5c22eb434b55ae
|
|
|
74096c |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
74096c |
Reviewed-on: https://code.engineering.redhat.com/gerrit/220555
|
|
|
74096c |
Tested-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
74096c |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
74096c |
---
|
|
|
74096c |
libglusterfs/src/glusterfs/globals.h | 4 +-
|
|
|
74096c |
libglusterfs/src/syncop-utils.c | 4 +-
|
|
|
74096c |
tests/basic/afr/add-brick-self-heal-non-granular.t | 75 +++++++++++++
|
|
|
74096c |
tests/basic/afr/add-brick-self-heal.t | 4 +-
|
|
|
74096c |
tests/basic/afr/bug-1130892-non-granular.t | 77 ++++++++++++++
|
|
|
74096c |
.../basic/afr/bug-1493415-gfid-heal-non-granular.t | 79 ++++++++++++++
|
|
|
74096c |
...507-type-mismatch-error-handling-non-granular.t | 117 +++++++++++++++++++++
|
|
|
74096c |
...1749322-entry-heal-not-happening-non-granular.t | 90 ++++++++++++++++
|
|
|
74096c |
.../afr/replace-brick-self-heal-non-granular.t | 65 ++++++++++++
|
|
|
74096c |
tests/basic/afr/replace-brick-self-heal.t | 2 +-
|
|
|
74096c |
tests/bugs/replicate/bug-1130892.t | 2 +-
|
|
|
74096c |
tests/bugs/replicate/bug-1493415-gfid-heal.t | 2 +-
|
|
|
74096c |
.../bug-1722507-type-mismatch-error-handling.t | 26 +++--
|
|
|
74096c |
.../bug-1749322-entry-heal-not-happening.t | 7 +-
|
|
|
74096c |
xlators/cluster/afr/src/afr-self-heal-common.c | 5 +
|
|
|
74096c |
xlators/cluster/afr/src/afr-self-heal-data.c | 3 +
|
|
|
74096c |
xlators/cluster/afr/src/afr-self-heal-entry.c | 7 +-
|
|
|
74096c |
xlators/mgmt/glusterd/src/glusterd-utils.c | 13 +++
|
|
|
74096c |
18 files changed, 558 insertions(+), 24 deletions(-)
|
|
|
74096c |
create mode 100644 tests/basic/afr/add-brick-self-heal-non-granular.t
|
|
|
74096c |
create mode 100644 tests/basic/afr/bug-1130892-non-granular.t
|
|
|
74096c |
create mode 100644 tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
|
|
|
74096c |
create mode 100644 tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
|
|
|
74096c |
create mode 100644 tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
|
|
|
74096c |
create mode 100644 tests/basic/afr/replace-brick-self-heal-non-granular.t
|
|
|
74096c |
|
|
|
74096c |
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
|
|
|
74096c |
index 31717ed..cc145cd 100644
|
|
|
74096c |
--- a/libglusterfs/src/glusterfs/globals.h
|
|
|
74096c |
+++ b/libglusterfs/src/glusterfs/globals.h
|
|
|
74096c |
@@ -50,7 +50,7 @@
|
|
|
74096c |
1 /* MIN is the fresh start op-version, mostly \
|
|
|
74096c |
should not change */
|
|
|
74096c |
#define GD_OP_VERSION_MAX \
|
|
|
74096c |
- GD_OP_VERSION_7_0 /* MAX VERSION is the maximum \
|
|
|
74096c |
+ GD_OP_VERSION_7_1 /* MAX VERSION is the maximum \
|
|
|
74096c |
count in VME table, should \
|
|
|
74096c |
keep changing with \
|
|
|
74096c |
introduction of newer \
|
|
|
74096c |
@@ -138,6 +138,8 @@
|
|
|
74096c |
|
|
|
74096c |
#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
|
|
|
74096c |
|
|
|
74096c |
+#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
|
|
|
74096c |
+
|
|
|
74096c |
#include "glusterfs/xlator.h"
|
|
|
74096c |
#include "glusterfs/options.h"
|
|
|
74096c |
|
|
|
74096c |
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
|
|
|
74096c |
index be03527..2269c76 100644
|
|
|
74096c |
--- a/libglusterfs/src/syncop-utils.c
|
|
|
74096c |
+++ b/libglusterfs/src/syncop-utils.c
|
|
|
74096c |
@@ -495,9 +495,7 @@ syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
|
|
|
74096c |
if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
|
|
|
74096c |
continue;
|
|
|
74096c |
|
|
|
74096c |
- ret = fn(subvol, entry, loc, data);
|
|
|
74096c |
- if (ret)
|
|
|
74096c |
- break;
|
|
|
74096c |
+ ret |= fn(subvol, entry, loc, data);
|
|
|
74096c |
}
|
|
|
74096c |
gf_dirent_free(&entries);
|
|
|
74096c |
if (ret)
|
|
|
74096c |
diff --git a/tests/basic/afr/add-brick-self-heal-non-granular.t b/tests/basic/afr/add-brick-self-heal-non-granular.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..19caf24
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/basic/afr/add-brick-self-heal-non-granular.t
|
|
|
74096c |
@@ -0,0 +1,75 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+. $(dirname $0)/../../include.rc
|
|
|
74096c |
+. $(dirname $0)/../../volume.rc
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
+
|
|
|
74096c |
+TEST glusterd
|
|
|
74096c |
+TEST pidof glusterd
|
|
|
74096c |
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
|
|
|
74096c |
+EXPECT 'Created' volinfo_field $V0 'Status';
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
|
|
|
74096c |
+TEST $CLI volume start $V0
|
|
|
74096c |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.data-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.entry-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume set $V0 self-heal-daemon off
|
|
|
74096c |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
|
74096c |
+
|
|
|
74096c |
+# Create files
|
|
|
74096c |
+for i in {1..5}
|
|
|
74096c |
+do
|
|
|
74096c |
+ echo $i > $M0/file$i.txt
|
|
|
74096c |
+done
|
|
|
74096c |
+
|
|
|
74096c |
+# Metadata changes
|
|
|
74096c |
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
|
|
|
74096c |
+
|
|
|
74096c |
+# Add brick1
|
|
|
74096c |
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
|
|
|
74096c |
+
|
|
|
74096c |
+# New-brick should accuse the old-bricks (Simulating case for data-loss)
|
|
|
74096c |
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
|
|
|
74096c |
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
|
|
|
74096c |
+
|
|
|
74096c |
+# Check if pending xattr and dirty-xattr are set for newly-added-brick
|
|
|
74096c |
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
|
|
|
74096c |
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
|
|
|
74096c |
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume set $V0 self-heal-daemon on
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Wait for heal to complete
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Check if entry-heal has happened
|
|
|
74096c |
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
|
|
|
74096c |
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
|
|
|
74096c |
+
|
|
|
74096c |
+# Test if data was healed
|
|
|
74096c |
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
|
|
|
74096c |
+
|
|
|
74096c |
+# Test if metadata was healed and exists on both the bricks
|
|
|
74096c |
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
|
|
|
74096c |
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
|
|
|
74096c |
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
|
|
|
74096c |
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t
|
|
|
74096c |
index c847e22..7ebf4f6 100644
|
|
|
74096c |
--- a/tests/basic/afr/add-brick-self-heal.t
|
|
|
74096c |
+++ b/tests/basic/afr/add-brick-self-heal.t
|
|
|
74096c |
@@ -38,8 +38,8 @@ TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0
|
|
|
74096c |
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
|
|
|
74096c |
|
|
|
74096c |
# Check if pending xattr and dirty-xattr are set for newly-added-brick
|
|
|
74096c |
-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
|
|
|
74096c |
-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
|
|
|
74096c |
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
|
|
|
74096c |
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
|
|
|
74096c |
EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
|
|
|
74096c |
|
|
|
74096c |
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
74096c |
diff --git a/tests/basic/afr/bug-1130892-non-granular.t b/tests/basic/afr/bug-1130892-non-granular.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..3cdbc7d
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/basic/afr/bug-1130892-non-granular.t
|
|
|
74096c |
@@ -0,0 +1,77 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+. $(dirname $0)/../../include.rc
|
|
|
74096c |
+. $(dirname $0)/../../volume.rc
|
|
|
74096c |
+. $(dirname $0)/../../afr.rc
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
+
|
|
|
74096c |
+TEST glusterd
|
|
|
74096c |
+TEST pidof glusterd
|
|
|
74096c |
+TEST $CLI volume info;
|
|
|
74096c |
+
|
|
|
74096c |
+# Create a 1X2 replica
|
|
|
74096c |
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
|
|
|
74096c |
+EXPECT 'Created' volinfo_field $V0 'Status';
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
|
|
|
74096c |
+
|
|
|
74096c |
+# Disable self-heal daemon
|
|
|
74096c |
+TEST gluster volume set $V0 self-heal-daemon off
|
|
|
74096c |
+
|
|
|
74096c |
+# Enable Client side heal
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.data-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.entry-self-heal off
|
|
|
74096c |
+
|
|
|
74096c |
+# Disable all perf-xlators
|
|
|
74096c |
+TEST $CLI volume set $V0 performance.quick-read off
|
|
|
74096c |
+TEST $CLI volume set $V0 performance.io-cache off
|
|
|
74096c |
+TEST $CLI volume set $V0 performance.write-behind off
|
|
|
74096c |
+TEST $CLI volume set $V0 performance.stat-prefetch off
|
|
|
74096c |
+TEST $CLI volume set $V0 performance.read-ahead off
|
|
|
74096c |
+
|
|
|
74096c |
+# Volume start
|
|
|
74096c |
+TEST $CLI volume start $V0;
|
|
|
74096c |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
74096c |
+
|
|
|
74096c |
+# FUSE Mount
|
|
|
74096c |
+TEST ${GFS} -s $H0 --volfile-id $V0 $M0
|
|
|
74096c |
+
|
|
|
74096c |
+# Create files and dirs
|
|
|
74096c |
+TEST mkdir -p $M0/one/two/
|
|
|
74096c |
+TEST `echo "Carpe diem" > $M0/one/two/three`
|
|
|
74096c |
+
|
|
|
74096c |
+# Simulate disk-replacement
|
|
|
74096c |
+TEST kill_brick $V0 $H0 $B0/${V0}-1
|
|
|
74096c |
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
|
|
|
74096c |
+TEST rm -rf $B0/${V0}-1/one
|
|
|
74096c |
+TEST rm -rf $B0/${V0}-1/.glusterfs
|
|
|
74096c |
+
|
|
|
74096c |
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
|
|
|
74096c |
+#which will create .glusterfs folder.
|
|
|
74096c |
+mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs
|
|
|
74096c |
+
|
|
|
74096c |
+# Start force
|
|
|
74096c |
+TEST $CLI volume start $V0 force
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
74096c |
+
|
|
|
74096c |
+TEST stat $M0/one
|
|
|
74096c |
+
|
|
|
74096c |
+sleep 1
|
|
|
74096c |
+
|
|
|
74096c |
+# Check pending xattrs
|
|
|
74096c |
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
|
|
|
74096c |
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
|
|
|
74096c |
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
|
|
|
74096c |
+
|
|
|
74096c |
+TEST gluster volume set $V0 self-heal-daemon on
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
diff --git a/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..aff001c
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/basic/afr/bug-1493415-gfid-heal-non-granular.t
|
|
|
74096c |
@@ -0,0 +1,79 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+. $(dirname $0)/../../include.rc
|
|
|
74096c |
+. $(dirname $0)/../../volume.rc
|
|
|
74096c |
+. $(dirname $0)/../../afr.rc
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
+
|
|
|
74096c |
+TEST glusterd
|
|
|
74096c |
+TEST pidof glusterd
|
|
|
74096c |
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
|
|
|
74096c |
+TEST $CLI volume start $V0
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
74096c |
+TEST $CLI volume set $V0 self-heal-daemon off
|
|
|
74096c |
+
|
|
|
74096c |
+# Create base entry in indices/xattrop
|
|
|
74096c |
+echo "Data" > $M0/FILE
|
|
|
74096c |
+
|
|
|
74096c |
+#------------------------------------------------------------------------------#
|
|
|
74096c |
+TEST touch $M0/f1
|
|
|
74096c |
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
|
|
|
74096c |
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
|
|
|
74096c |
+
|
|
|
74096c |
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
|
|
|
74096c |
+# brick crash at the point where file got created but no xattrs were set.
|
|
|
74096c |
+TEST setfattr -x trusted.gfid $B0/${V0}1/f1
|
|
|
74096c |
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
|
|
|
74096c |
+
|
|
|
74096c |
+# storage/posix considers that a file without gfid changed less than a second
|
|
|
74096c |
+# before doesn't exist, so we need to wait for a second to force posix to
|
|
|
74096c |
+# consider that this is a valid file but without gfid.
|
|
|
74096c |
+sleep 2
|
|
|
74096c |
+
|
|
|
74096c |
+# Assume there were no pending xattrs on parent dir due to 1st brick crashing
|
|
|
74096c |
+# too. Then name heal from client must heal the gfid.
|
|
|
74096c |
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
74096c |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
74096c |
+TEST stat $M0/f1
|
|
|
74096c |
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1
|
|
|
74096c |
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
|
|
|
74096c |
+
|
|
|
74096c |
+#------------------------------------------------------------------------------#
|
|
|
74096c |
+TEST mkdir $M0/dir
|
|
|
74096c |
+TEST touch $M0/dir/f2
|
|
|
74096c |
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2)
|
|
|
74096c |
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
|
|
|
74096c |
+
|
|
|
74096c |
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
|
|
|
74096c |
+# brick crash at the point where file got created but no xattrs were set.
|
|
|
74096c |
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
|
|
|
74096c |
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
|
|
|
74096c |
+
|
|
|
74096c |
+#Now simulate setting of pending entry xattr on parent dir of 1st brick.
|
|
|
74096c |
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
|
|
|
74096c |
+create_brick_xattrop_entry $B0/${V0}0 dir
|
|
|
74096c |
+
|
|
|
74096c |
+# storage/posix considers that a file without gfid changed less than a second
|
|
|
74096c |
+# before doesn't exist, so we need to wait for a second to force posix to
|
|
|
74096c |
+# consider that this is a valid file but without gfid.
|
|
|
74096c |
+sleep 2
|
|
|
74096c |
+
|
|
|
74096c |
+#Trigger entry-heal via shd
|
|
|
74096c |
+TEST $CLI volume set $V0 self-heal-daemon on
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2
|
|
|
74096c |
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
|
|
|
74096c |
+
|
|
|
74096c |
+#------------------------------------------------------------------------------#
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
diff --git a/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..9079c93
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/basic/afr/bug-1722507-type-mismatch-error-handling-non-granular.t
|
|
|
74096c |
@@ -0,0 +1,117 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+
|
|
|
74096c |
+. $(dirname $0)/../../include.rc
|
|
|
74096c |
+. $(dirname $0)/../../volume.rc
|
|
|
74096c |
+. $(dirname $0)/../../afr.rc
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
+
|
|
|
74096c |
+## Start and create a volume
|
|
|
74096c |
+TEST glusterd;
|
|
|
74096c |
+TEST pidof glusterd;
|
|
|
74096c |
+TEST $CLI volume info;
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
|
|
|
74096c |
+TEST $CLI volume start $V0;
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
|
74096c |
+TEST $CLI volume heal $V0 disable
|
|
|
74096c |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
74096c |
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
|
|
74096c |
+
|
|
|
74096c |
+TEST mkdir $M0/dir
|
|
|
74096c |
+
|
|
|
74096c |
+##########################################################################################
|
|
|
74096c |
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
|
|
|
74096c |
+
|
|
|
74096c |
+TEST touch $M0/dir/file
|
|
|
74096c |
+TEST `echo append>> $M0/dir/file`
|
|
|
74096c |
+
|
|
|
74096c |
+#B0 and B2 must blame B1
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+
|
|
|
74096c |
+# Add entry to xattrop dir to trigger index heal.
|
|
|
74096c |
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
74096c |
+base_entry_b0=`ls $xattrop_dir0`
|
|
|
74096c |
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
74096c |
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
74096c |
+EXPECT "^1$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Remove the gfid xattr and the link file on one brick.
|
|
|
74096c |
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
|
|
|
74096c |
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
|
|
|
74096c |
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
|
|
|
74096c |
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
74096c |
+
|
|
|
74096c |
+# Launch heal
|
|
|
74096c |
+TEST $CLI volume heal $V0 enable
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
|
|
|
74096c |
+
|
|
|
74096c |
+# Wait for 2 second to force posix to consider that this is a valid file but
|
|
|
74096c |
+# without gfid.
|
|
|
74096c |
+sleep 2
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Heal should not fail as the file is missing gfid xattr and the link file,
|
|
|
74096c |
+# which is not actually the gfid or type mismatch.
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
|
|
|
74096c |
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
74096c |
+rm -f $M0/dir/file
|
|
|
74096c |
+
|
|
|
74096c |
+
|
|
|
74096c |
+###########################################################################################
|
|
|
74096c |
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume heal $V0 disable
|
|
|
74096c |
+TEST touch $M0/dir/file
|
|
|
74096c |
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
|
|
|
74096c |
+
|
|
|
74096c |
+#B0 and B2 must blame B1
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+
|
|
|
74096c |
+# Add entry to xattrop dir to trigger index heal.
|
|
|
74096c |
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
74096c |
+base_entry_b0=`ls $xattrop_dir0`
|
|
|
74096c |
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
74096c |
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
74096c |
+EXPECT "^1$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Remove the gfid xattr and the link file on two bricks.
|
|
|
74096c |
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
|
|
|
74096c |
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
|
|
|
74096c |
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
|
|
|
74096c |
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
74096c |
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
|
|
|
74096c |
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
74096c |
+
|
|
|
74096c |
+# Launch heal
|
|
|
74096c |
+TEST $CLI volume heal $V0 enable
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
|
|
|
74096c |
+
|
|
|
74096c |
+# Wait for 2 second to force posix to consider that this is a valid file but
|
|
|
74096c |
+# without gfid.
|
|
|
74096c |
+sleep 2
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Heal should not fail as the file is missing gfid xattr and the link file,
|
|
|
74096c |
+# which is not actually the gfid or type mismatch.
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
|
|
|
74096c |
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
74096c |
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
|
|
|
74096c |
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup
|
|
|
74096c |
diff --git a/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..4f27da4
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/basic/afr/bug-1749322-entry-heal-not-happening-non-granular.t
|
|
|
74096c |
@@ -0,0 +1,90 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+
|
|
|
74096c |
+. $(dirname $0)/../../include.rc
|
|
|
74096c |
+. $(dirname $0)/../../volume.rc
|
|
|
74096c |
+. $(dirname $0)/../../afr.rc
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup
|
|
|
74096c |
+
|
|
|
74096c |
+function check_gfid_and_link_count
|
|
|
74096c |
+{
|
|
|
74096c |
+ local file=$1
|
|
|
74096c |
+
|
|
|
74096c |
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
|
|
|
74096c |
+ TEST [ ! -z $file_gfid_b0 ]
|
|
|
74096c |
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
|
|
|
74096c |
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
|
|
|
74096c |
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
|
|
|
74096c |
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
|
|
|
74096c |
+
|
|
|
74096c |
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
|
|
|
74096c |
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
|
|
|
74096c |
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
|
|
|
74096c |
+}
|
|
|
74096c |
+TESTS_EXPECTED_IN_LOOP=18
|
|
|
74096c |
+
|
|
|
74096c |
+################################################################################
|
|
|
74096c |
+## Start and create a volume
|
|
|
74096c |
+TEST glusterd;
|
|
|
74096c |
+TEST pidof glusterd;
|
|
|
74096c |
+TEST $CLI volume info;
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
|
|
|
74096c |
+TEST $CLI volume start $V0;
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
|
74096c |
+TEST $CLI volume heal $V0 disable
|
|
|
74096c |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
74096c |
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
|
|
74096c |
+
|
|
|
74096c |
+TEST mkdir $M0/dir
|
|
|
74096c |
+TEST `echo "File 1 " > $M0/dir/file1`
|
|
|
74096c |
+TEST touch $M0/dir/file{2..4}
|
|
|
74096c |
+
|
|
|
74096c |
+# Remove file2 from 1st & 3rd bricks
|
|
|
74096c |
+TEST rm -f $B0/$V0"0"/dir/file2
|
|
|
74096c |
+TEST rm -f $B0/$V0"2"/dir/file2
|
|
|
74096c |
+
|
|
|
74096c |
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
|
|
|
74096c |
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
|
|
|
74096c |
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
|
|
|
74096c |
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
|
|
|
74096c |
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
|
|
|
74096c |
+TEST rm -f $B0/$V0"0"/dir/file3
|
|
|
74096c |
+TEST rm -f $B0/$V0"1"/dir/file3
|
|
|
74096c |
+
|
|
|
74096c |
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
|
|
|
74096c |
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
|
|
|
74096c |
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
|
|
|
74096c |
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
|
|
|
74096c |
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
|
|
|
74096c |
+
|
|
|
74096c |
+# B0 and B2 blame each other
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+
|
|
|
74096c |
+# Add entry to xattrop dir on first brick.
|
|
|
74096c |
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
74096c |
+base_entry_b0=`ls $xattrop_dir0`
|
|
|
74096c |
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
74096c |
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT "^1$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Launch heal
|
|
|
74096c |
+TEST $CLI volume heal $V0 enable
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# All the files must be present on all the bricks after conservative merge and
|
|
|
74096c |
+# should have the gfid xattr and the .glusterfs hardlink.
|
|
|
74096c |
+check_gfid_and_link_count dir/file1
|
|
|
74096c |
+check_gfid_and_link_count dir/file2
|
|
|
74096c |
+check_gfid_and_link_count dir/file3
|
|
|
74096c |
+check_gfid_and_link_count dir/file4
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup
|
|
|
74096c |
diff --git a/tests/basic/afr/replace-brick-self-heal-non-granular.t b/tests/basic/afr/replace-brick-self-heal-non-granular.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..c86bff1
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/basic/afr/replace-brick-self-heal-non-granular.t
|
|
|
74096c |
@@ -0,0 +1,65 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+. $(dirname $0)/../../include.rc
|
|
|
74096c |
+. $(dirname $0)/../../volume.rc
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
+
|
|
|
74096c |
+TEST glusterd
|
|
|
74096c |
+TEST pidof glusterd
|
|
|
74096c |
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
|
|
|
74096c |
+TEST $CLI volume start $V0
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.data-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.entry-self-heal off
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
|
74096c |
+TEST $CLI volume set $V0 self-heal-daemon off
|
|
|
74096c |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
|
|
74096c |
+
|
|
|
74096c |
+# Create files
|
|
|
74096c |
+for i in {1..5}
|
|
|
74096c |
+do
|
|
|
74096c |
+ echo $i > $M0/file$i.txt
|
|
|
74096c |
+done
|
|
|
74096c |
+
|
|
|
74096c |
+# Metadata changes
|
|
|
74096c |
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
|
|
|
74096c |
+
|
|
|
74096c |
+# Replace brick1
|
|
|
74096c |
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
|
|
|
74096c |
+
|
|
|
74096c |
+# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
|
|
|
74096c |
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
|
|
|
74096c |
+
|
|
|
74096c |
+# Check if pending xattr and dirty-xattr are set for replaced-brick
|
|
|
74096c |
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
|
|
|
74096c |
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
74096c |
+
|
|
|
74096c |
+TEST $CLI volume set $V0 self-heal-daemon on
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
|
|
|
74096c |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Wait for heal to complete
|
|
|
74096c |
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Check if entry-heal has happened
|
|
|
74096c |
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
|
|
|
74096c |
+
|
|
|
74096c |
+# To make sure that files were not lost from brick0
|
|
|
74096c |
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
|
|
|
74096c |
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
|
|
|
74096c |
+
|
|
|
74096c |
+# Test if data was healed
|
|
|
74096c |
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
|
|
|
74096c |
+# To make sure that data was not lost from brick0
|
|
|
74096c |
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
|
|
|
74096c |
+
|
|
|
74096c |
+# Test if metadata was healed and exists on both the bricks
|
|
|
74096c |
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
|
|
|
74096c |
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t
|
|
|
74096c |
index 0360db7..da31c87 100644
|
|
|
74096c |
--- a/tests/basic/afr/replace-brick-self-heal.t
|
|
|
74096c |
+++ b/tests/basic/afr/replace-brick-self-heal.t
|
|
|
74096c |
@@ -30,7 +30,7 @@ TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit forc
|
|
|
74096c |
TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
|
|
|
74096c |
|
|
|
74096c |
# Check if pending xattr and dirty-xattr are set for replaced-brick
|
|
|
74096c |
-EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
|
|
|
74096c |
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
|
|
|
74096c |
EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
|
|
|
74096c |
|
|
|
74096c |
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
|
|
74096c |
diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t
|
|
|
74096c |
index 0f57d66..e23eb26 100644
|
|
|
74096c |
--- a/tests/bugs/replicate/bug-1130892.t
|
|
|
74096c |
+++ b/tests/bugs/replicate/bug-1130892.t
|
|
|
74096c |
@@ -56,7 +56,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
|
|
74096c |
TEST stat $M0/one
|
|
|
74096c |
|
|
|
74096c |
# Check pending xattrs
|
|
|
74096c |
-EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
|
|
|
74096c |
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
|
|
|
74096c |
EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
|
|
|
74096c |
EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
|
|
|
74096c |
|
|
|
74096c |
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
|
|
|
74096c |
index 125c35a..9714d5e 100644
|
|
|
74096c |
--- a/tests/bugs/replicate/bug-1493415-gfid-heal.t
|
|
|
74096c |
+++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
|
|
|
74096c |
@@ -49,7 +49,7 @@ TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
|
|
|
74096c |
TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
|
|
|
74096c |
|
|
|
74096c |
#Now simulate setting of pending entry xattr on parent dir of 1st brick.
|
|
|
74096c |
-TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
|
|
|
74096c |
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/${V0}0/dir
|
|
|
74096c |
create_brick_xattrop_entry $B0/${V0}0 dir
|
|
|
74096c |
|
|
|
74096c |
#Trigger entry-heal via shd
|
|
|
74096c |
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
|
|
|
74096c |
index 0aeaaaf..1fdf7ea 100644
|
|
|
74096c |
--- a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
|
|
|
74096c |
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
|
|
|
74096c |
@@ -23,19 +23,21 @@ TEST mkdir $M0/dir
|
|
|
74096c |
##########################################################################################
|
|
|
74096c |
# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
|
|
|
74096c |
|
|
|
74096c |
-TEST touch $M0/dir/file
|
|
|
74096c |
-#TEST kill_brick $V0 $H0 $B0/$V0"1"
|
|
|
74096c |
+TEST `echo append>> $M0/dir/file`
|
|
|
74096c |
|
|
|
74096c |
#B0 and B2 must blame B1
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
|
|
|
74096c |
+# Another way is to create the needed entries inside indices/entry-changes
|
|
|
74096c |
+# folder.
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
|
|
|
74096c |
# Add entry to xattrop dir to trigger index heal.
|
|
|
74096c |
xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
74096c |
base_entry_b0=`ls $xattrop_dir0`
|
|
|
74096c |
gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
74096c |
-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
74096c |
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
74096c |
EXPECT "^1$" get_pending_heal_count $V0
|
|
|
74096c |
|
|
|
74096c |
# Remove the gfid xattr and the link file on one brick.
|
|
|
74096c |
@@ -70,18 +72,20 @@ rm -f $M0/dir/file
|
|
|
74096c |
|
|
|
74096c |
TEST $CLI volume heal $V0 disable
|
|
|
74096c |
TEST touch $M0/dir/file
|
|
|
74096c |
-#TEST kill_brick $V0 $H0 $B0/$V0"1"
|
|
|
74096c |
|
|
|
74096c |
#B0 and B2 must blame B1
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
|
|
|
74096c |
+# Another way is to create the needed entries inside indices/entry-changes
|
|
|
74096c |
+# folder.
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
|
|
|
74096c |
# Add entry to xattrop dir to trigger index heal.
|
|
|
74096c |
xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
74096c |
base_entry_b0=`ls $xattrop_dir0`
|
|
|
74096c |
gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
|
|
|
74096c |
-ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
74096c |
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
|
|
|
74096c |
EXPECT "^1$" get_pending_heal_count $V0
|
|
|
74096c |
|
|
|
74096c |
# Remove the gfid xattr and the link file on two bricks.
|
|
|
74096c |
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
|
74096c |
index 9627908..3da873a 100644
|
|
|
74096c |
--- a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
|
74096c |
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
|
|
|
74096c |
@@ -59,8 +59,11 @@ TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_
|
|
|
74096c |
TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
|
|
|
74096c |
|
|
|
74096c |
# B0 and B2 blame each other
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
-setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
+# Set data part of the xattr also to 1 so that local->need_full_crawl is true.
|
|
|
74096c |
+# Another way is to create the needed entries inside indices/entry-changes
|
|
|
74096c |
+# folder.
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000001 $B0/$V0"2"/dir
|
|
|
74096c |
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000001 $B0/$V0"0"/dir
|
|
|
74096c |
|
|
|
74096c |
# Add entry to xattrop dir on first brick.
|
|
|
74096c |
xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
74096c |
index 1608f75..36fd3a9 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
74096c |
@@ -2549,6 +2549,11 @@ afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
|
|
|
74096c |
}
|
|
|
74096c |
}
|
|
|
74096c |
|
|
|
74096c |
+ gf_msg_debug(
|
|
|
74096c |
+ this->name, 0,
|
|
|
74096c |
+ "heals needed for %s: [entry-heal=%d, metadata-heal=%d, data-heal=%d]",
|
|
|
74096c |
+ uuid_utoa(gfid), entry_selfheal, metadata_selfheal, data_selfheal);
|
|
|
74096c |
+
|
|
|
74096c |
if (data_selfheal && priv->data_self_heal)
|
|
|
74096c |
data_ret = afr_selfheal_data(frame, this, fd);
|
|
|
74096c |
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
|
|
|
74096c |
index cdff4a5..b97c66b 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
|
|
|
74096c |
@@ -239,6 +239,9 @@ afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
74096c |
sink_count = AFR_COUNT(healed_sinks, priv->child_count);
|
|
|
74096c |
data_lock = alloca0(priv->child_count);
|
|
|
74096c |
|
|
|
74096c |
+ gf_msg_debug(this->name, 0, "gfid:%s, offset=%jd, size=%zu",
|
|
|
74096c |
+ uuid_utoa(fd->inode->gfid), offset, size);
|
|
|
74096c |
+
|
|
|
74096c |
ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
|
|
|
74096c |
data_lock);
|
|
|
74096c |
{
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
74096c |
index 40be898..00b5b2d 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
|
|
|
74096c |
@@ -206,8 +206,11 @@ __afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
74096c |
replies);
|
|
|
74096c |
} else {
|
|
|
74096c |
if (!gf_uuid_compare(replies[i].poststat.ia_gfid,
|
|
|
74096c |
- replies[source].poststat.ia_gfid))
|
|
|
74096c |
+ replies[source].poststat.ia_gfid)) {
|
|
|
74096c |
+ gf_msg_debug(this->name, 0, "skipping %s, no heal needed.",
|
|
|
74096c |
+ name);
|
|
|
74096c |
continue;
|
|
|
74096c |
+ }
|
|
|
74096c |
|
|
|
74096c |
ret = afr_selfheal_recreate_entry(frame, i, source, sources,
|
|
|
74096c |
fd->inode, name, inode, replies);
|
|
|
74096c |
@@ -839,7 +842,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
|
|
|
74096c |
|
|
|
74096c |
out:
|
|
|
74096c |
loc_wipe(&loc;;
|
|
|
74096c |
- return 0;
|
|
|
74096c |
+ return ret;
|
|
|
74096c |
}
|
|
|
74096c |
|
|
|
74096c |
static int
|
|
|
74096c |
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
74096c |
index a72c494..bd17a82 100644
|
|
|
74096c |
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
74096c |
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
|
|
|
74096c |
@@ -13181,6 +13181,19 @@ glusterd_enable_default_options(glusterd_volinfo_t *volinfo, char *option)
|
|
|
74096c |
goto out;
|
|
|
74096c |
}
|
|
|
74096c |
}
|
|
|
74096c |
+
|
|
|
74096c |
+ if ((conf->op_version >= GD_OP_VERSION_7_1) &&
|
|
|
74096c |
+ (volinfo->status == GLUSTERD_STATUS_NONE)) {
|
|
|
74096c |
+ ret = dict_set_dynstr_with_alloc(volinfo->dict,
|
|
|
74096c |
+ "cluster.granular-entry-heal", "on");
|
|
|
74096c |
+ if (ret) {
|
|
|
74096c |
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
|
|
|
74096c |
+ "Failed to set option 'cluster.granular-entry-heal' "
|
|
|
74096c |
+ "on volume %s",
|
|
|
74096c |
+ volinfo->volname);
|
|
|
74096c |
+ goto out;
|
|
|
74096c |
+ }
|
|
|
74096c |
+ }
|
|
|
74096c |
out:
|
|
|
74096c |
return ret;
|
|
|
74096c |
}
|
|
|
74096c |
--
|
|
|
74096c |
1.8.3.1
|
|
|
74096c |
|