|
|
3604df |
From a7f7b267e1d06fde5345c746f75778acd72357b8 Mon Sep 17 00:00:00 2001
|
|
|
3604df |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
3604df |
Date: Wed, 25 Jan 2017 15:31:44 +0530
|
|
|
3604df |
Subject: [PATCH 299/300] cluster/ec: Don't trigger data/metadata heal on
|
|
|
3604df |
Lookups
|
|
|
3604df |
|
|
|
3604df |
Problem-1
|
|
|
3604df |
If Lookup which doesn't take any locks observes version mismatch it can't be
|
|
|
3604df |
trusted. If we launch a heal based on this information it will lead to
|
|
|
3604df |
self-heals which will affect I/O performance in the cases where Lookup is
|
|
|
3604df |
wrong. Considering self-heal-daemon and operations on the inode from client
|
|
|
3604df |
which take locks can still trigger heal we can choose to not attempt a heal on
|
|
|
3604df |
Lookup.
|
|
|
3604df |
|
|
|
3604df |
Problem-2:
|
|
|
3604df |
Fixed spurious failure of
|
|
|
3604df |
tests/bitrot/bug-1373520.t
|
|
|
3604df |
For the issues above, what was happening was that ec_heal_inspect()
|
|
|
3604df |
is preventing 'name' heal to happen
|
|
|
3604df |
|
|
|
3604df |
Problem-3:
|
|
|
3604df |
tests/basic/ec/ec-background-heals.t
|
|
|
3604df |
To be honest I don't know what the problem was, while fixing
|
|
|
3604df |
the 2 problems above, I made some changes to ec_heal_inspect() and
|
|
|
3604df |
ec_need_heal() after which when I tried to recreate the spurious
|
|
|
3604df |
failure it just didn't happen even after a long time.
|
|
|
3604df |
|
|
|
3604df |
>BUG: 1414287
|
|
|
3604df |
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
3604df |
>Change-Id: Ife2535e1d0b267712973673f6d474e288f3c6834
|
|
|
3604df |
>Reviewed-on: https://review.gluster.org/16468
|
|
|
3604df |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
|
|
|
3604df |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>Reviewed-by: Ashish Pandey <aspandey@redhat.com>
|
|
|
3604df |
|
|
|
3604df |
BUG: 1426559
|
|
|
3604df |
Change-Id: I340b48cd416b07890bf3a5427562f5e3f88a481f
|
|
|
3604df |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
3604df |
Reviewed-on: https://code.engineering.redhat.com/gerrit/98694
|
|
|
3604df |
---
|
|
|
3604df |
libglusterfs/src/cluster-syncop.c | 4 +
|
|
|
3604df |
tests/basic/ec/self-heal.t | 21 ++-
|
|
|
3604df |
tests/bitrot/bug-1373520.t | 4 +-
|
|
|
3604df |
tests/volume.rc | 5 +
|
|
|
3604df |
xlators/cluster/ec/src/ec-common.c | 56 +++++--
|
|
|
3604df |
xlators/cluster/ec/src/ec-common.h | 1 +
|
|
|
3604df |
xlators/cluster/ec/src/ec-heal.c | 321 +++++++++++++++++++++++++++---------
|
|
|
3604df |
xlators/cluster/ec/src/ec-helpers.c | 18 +-
|
|
|
3604df |
xlators/cluster/ec/src/ec-helpers.h | 3 +
|
|
|
3604df |
9 files changed, 327 insertions(+), 106 deletions(-)
|
|
|
3604df |
|
|
|
3604df |
diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c
|
|
|
3604df |
index 10993e6..98a46c8 100644
|
|
|
3604df |
--- a/libglusterfs/src/cluster-syncop.c
|
|
|
3604df |
+++ b/libglusterfs/src/cluster-syncop.c
|
|
|
3604df |
@@ -102,6 +102,10 @@ void
|
|
|
3604df |
cluster_replies_wipe (default_args_cbk_t *replies, int numsubvols)
|
|
|
3604df |
{
|
|
|
3604df |
int i = 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (!replies)
|
|
|
3604df |
+ return;
|
|
|
3604df |
+
|
|
|
3604df |
for (i = 0; i < numsubvols; i++)
|
|
|
3604df |
args_cbk_wipe (&replies[i]);
|
|
|
3604df |
memset (replies, 0, numsubvols * sizeof (*replies));
|
|
|
3604df |
diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t
|
|
|
3604df |
index 98dd923..74cc241 100644
|
|
|
3604df |
--- a/tests/basic/ec/self-heal.t
|
|
|
3604df |
+++ b/tests/basic/ec/self-heal.t
|
|
|
3604df |
@@ -9,6 +9,7 @@ cleanup
|
|
|
3604df |
|
|
|
3604df |
function check_mount_dir
|
|
|
3604df |
{
|
|
|
3604df |
+ getfattr -d -m. -e hex $M0 2>&1 > /dev/null
|
|
|
3604df |
for i in {1..20}; do
|
|
|
3604df |
ls -l $M0/ | grep "dir1"
|
|
|
3604df |
if [ $? -ne 0 ]; then
|
|
|
3604df |
@@ -21,7 +22,7 @@ function check_mount_dir
|
|
|
3604df |
|
|
|
3604df |
function check_size
|
|
|
3604df |
{
|
|
|
3604df |
- stat $M0/$1
|
|
|
3604df |
+ cat $M0/$1 2>&1 > /dev/null
|
|
|
3604df |
for i in "${brick[@]}"; do
|
|
|
3604df |
res=`stat -c "%s" $i/$1`
|
|
|
3604df |
if [ "$res" != "$2" ]; then
|
|
|
3604df |
@@ -35,7 +36,7 @@ function check_size
|
|
|
3604df |
|
|
|
3604df |
function check_mode
|
|
|
3604df |
{
|
|
|
3604df |
- stat $M0/$1
|
|
|
3604df |
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
|
|
|
3604df |
for i in "${brick[@]}"; do
|
|
|
3604df |
res=`stat -c "%A" $i/$1`
|
|
|
3604df |
if [ "$res" != "$2" ]; then
|
|
|
3604df |
@@ -49,7 +50,7 @@ function check_mode
|
|
|
3604df |
|
|
|
3604df |
function check_date
|
|
|
3604df |
{
|
|
|
3604df |
- stat $M0/$1
|
|
|
3604df |
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
|
|
|
3604df |
for i in "${brick[@]}"; do
|
|
|
3604df |
res=`stat -c "%Y" $i/$1`
|
|
|
3604df |
if [ "$res" != "$2" ]; then
|
|
|
3604df |
@@ -63,7 +64,7 @@ function check_date
|
|
|
3604df |
|
|
|
3604df |
function check_xattr
|
|
|
3604df |
{
|
|
|
3604df |
- stat $M0/$1
|
|
|
3604df |
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
|
|
|
3604df |
for i in "${brick[@]}"; do
|
|
|
3604df |
getfattr -n $2 $i/$1 2>/dev/null
|
|
|
3604df |
if [ $? -eq 0 ]; then
|
|
|
3604df |
@@ -77,7 +78,7 @@ function check_xattr
|
|
|
3604df |
|
|
|
3604df |
function check_dir
|
|
|
3604df |
{
|
|
|
3604df |
- getfattr -m. -d $M0/dir1
|
|
|
3604df |
+ getfattr -m. -d $M0/dir1 2>&1 > /dev/null
|
|
|
3604df |
for i in "${brick[@]}"; do
|
|
|
3604df |
if [ ! -d $i/dir1 ]; then
|
|
|
3604df |
echo "N"
|
|
|
3604df |
@@ -90,7 +91,7 @@ function check_dir
|
|
|
3604df |
|
|
|
3604df |
function check_soft_link
|
|
|
3604df |
{
|
|
|
3604df |
- stat $M0/test3
|
|
|
3604df |
+ getfattr -d -m. -e hex $M0/test3 2>&1 > /dev/null
|
|
|
3604df |
for i in "${brick[@]}"; do
|
|
|
3604df |
if [ ! -h $i/test3 ]; then
|
|
|
3604df |
echo "N"
|
|
|
3604df |
@@ -103,7 +104,7 @@ function check_soft_link
|
|
|
3604df |
|
|
|
3604df |
function check_hard_link
|
|
|
3604df |
{
|
|
|
3604df |
- stat $M0/test4
|
|
|
3604df |
+ getfattr -d -m. -e hex $M0/test4 2>&1 > /dev/null
|
|
|
3604df |
for i in "${brick[@]}"; do
|
|
|
3604df |
res=`stat -c "%h" $i/test4`
|
|
|
3604df |
if [ "$res" != "3" ]; then
|
|
|
3604df |
@@ -125,10 +126,14 @@ TESTS_EXPECTED_IN_LOOP=194
|
|
|
3604df |
TEST glusterd
|
|
|
3604df |
TEST pidof glusterd
|
|
|
3604df |
TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
|
|
|
3604df |
+TEST $CLI volume set $V0 client-log-level DEBUG
|
|
|
3604df |
+#Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp'
|
|
|
3604df |
+TEST $CLI volume set $V0 performance.write-behind off
|
|
|
3604df |
EXPECT "Created" volinfo_field $V0 'Status'
|
|
|
3604df |
TEST $CLI volume start $V0
|
|
|
3604df |
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
|
|
|
3604df |
-TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
|
|
|
3604df |
+#direct-io-mode is to make sure 'cat' leads to READ fop which triggers heal
|
|
|
3604df |
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --direct-io-mode=yes $M0;
|
|
|
3604df |
# Wait until all 6 childs have been recognized by the ec xlator
|
|
|
3604df |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
|
|
3604df |
|
|
|
3604df |
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
|
|
|
3604df |
index 3a0ac52..115fb27 100644
|
|
|
3604df |
--- a/tests/bitrot/bug-1373520.t
|
|
|
3604df |
+++ b/tests/bitrot/bug-1373520.t
|
|
|
3604df |
@@ -55,9 +55,9 @@ TEST `ls -li $B0/${V0}5/FILE1 | awk '{print $1}' | xargs find $B0/${V0}5/ -inum
|
|
|
3604df |
|
|
|
3604df |
#Access files
|
|
|
3604df |
TEST cat $M0/FILE1
|
|
|
3604df |
-EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/FILE1
|
|
|
3604df |
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" path_size $B0/${V0}5/FILE1
|
|
|
3604df |
|
|
|
3604df |
TEST cat $M0/HL_FILE1
|
|
|
3604df |
-EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" stat -c %s $B0/${V0}5/HL_FILE1
|
|
|
3604df |
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" path_size $B0/${V0}5/HL_FILE1
|
|
|
3604df |
|
|
|
3604df |
cleanup;
|
|
|
3604df |
diff --git a/tests/volume.rc b/tests/volume.rc
|
|
|
3604df |
index b7fd20a..2eccea4 100644
|
|
|
3604df |
--- a/tests/volume.rc
|
|
|
3604df |
+++ b/tests/volume.rc
|
|
|
3604df |
@@ -470,6 +470,11 @@ function path_exists {
|
|
|
3604df |
if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+function path_size {
|
|
|
3604df |
+ local size=$(stat -c %s $1)
|
|
|
3604df |
+ if [ $? -eq 0 ]; then echo $size; else echo ""; fi
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
function force_umount {
|
|
|
3604df |
${UMOUNT_F} $*
|
|
|
3604df |
if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
|
3604df |
index febf508..3064af6 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-common.c
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
|
3604df |
@@ -82,10 +82,50 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
|
|
|
3604df |
return 0;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+static uintptr_t
|
|
|
3604df |
+ec_fop_needs_name_heal (ec_fop_data_t *fop)
|
|
|
3604df |
+{
|
|
|
3604df |
+ ec_t *ec = NULL;
|
|
|
3604df |
+ ec_cbk_data_t *cbk = NULL;
|
|
|
3604df |
+ ec_cbk_data_t *enoent_cbk = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ ec = fop->xl->private;
|
|
|
3604df |
+ if (fop->id != GF_FOP_LOOKUP)
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (!fop->loc[0].name || strlen (fop->loc[0].name) == 0)
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ list_for_each_entry(cbk, &fop->cbk_list, list)
|
|
|
3604df |
+ {
|
|
|
3604df |
+ if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) {
|
|
|
3604df |
+ enoent_cbk = cbk;
|
|
|
3604df |
+ break;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ if (!enoent_cbk)
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ return ec->xl_up & ~enoent_cbk->mask;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
int32_t ec_fop_needs_heal(ec_fop_data_t *fop)
|
|
|
3604df |
{
|
|
|
3604df |
ec_t *ec = fop->xl->private;
|
|
|
3604df |
|
|
|
3604df |
+ if (fop->lock_count == 0) {
|
|
|
3604df |
+ /*
|
|
|
3604df |
+ * if fop->lock_count is zero that means it saw version mismatch
|
|
|
3604df |
+ * without any locks so it can't be trusted. If we launch a heal
|
|
|
3604df |
+ * based on this it will lead to INODELKs which will affect I/O
|
|
|
3604df |
+ * performance. Considering self-heal-daemon and operations on
|
|
|
3604df |
+ * the inode from client which take locks can still trigger the
|
|
|
3604df |
+ * heal we can choose to not attempt a heal when fop->lock_count
|
|
|
3604df |
+ * is zero.
|
|
|
3604df |
+ */
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+ }
|
|
|
3604df |
return (ec->xl_up & ~(fop->remaining | fop->good)) != 0;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -94,7 +134,7 @@ void ec_check_status(ec_fop_data_t * fop)
|
|
|
3604df |
ec_t * ec = fop->xl->private;
|
|
|
3604df |
int32_t partial = 0;
|
|
|
3604df |
|
|
|
3604df |
- if (!ec_fop_needs_heal(fop)) {
|
|
|
3604df |
+ if (!ec_fop_needs_name_heal (fop) && !ec_fop_needs_heal(fop)) {
|
|
|
3604df |
return;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -107,15 +147,11 @@ void ec_check_status(ec_fop_data_t * fop)
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- if (fop->lock_count > 0) {
|
|
|
3604df |
- gf_msg (fop->xl->name, GF_LOG_WARNING, 0,
|
|
|
3604df |
- EC_MSG_OP_FAIL_ON_SUBVOLS,
|
|
|
3604df |
- "Operation failed on some "
|
|
|
3604df |
- "subvolumes (up=%lX, mask=%lX, "
|
|
|
3604df |
- "remaining=%lX, good=%lX, bad=%lX)",
|
|
|
3604df |
- ec->xl_up, fop->mask, fop->remaining, fop->good,
|
|
|
3604df |
- ec->xl_up & ~(fop->remaining | fop->good));
|
|
|
3604df |
- }
|
|
|
3604df |
+ gf_msg (fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
|
|
|
3604df |
+ "Operation failed on some subvolumes (up=%lX, mask=%lX, "
|
|
|
3604df |
+ "remaining=%lX, good=%lX, bad=%lX)",
|
|
|
3604df |
+ ec->xl_up, fop->mask, fop->remaining, fop->good,
|
|
|
3604df |
+ ec->xl_up & ~(fop->remaining | fop->good));
|
|
|
3604df |
|
|
|
3604df |
if (fop->use_fd)
|
|
|
3604df |
{
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
|
|
|
3604df |
index c532e0e..a03a590 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-common.h
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-common.h
|
|
|
3604df |
@@ -121,6 +121,7 @@ void ec_handle_healers_done (ec_fop_data_t *fop);
|
|
|
3604df |
int32_t
|
|
|
3604df |
ec_heal_inspect (call_frame_t *frame, ec_t *ec,
|
|
|
3604df |
inode_t *inode, unsigned char *locked_on,
|
|
|
3604df |
+ gf_boolean_t self_locked, gf_boolean_t thorough,
|
|
|
3604df |
gf_boolean_t *need_heal);
|
|
|
3604df |
int32_t
|
|
|
3604df |
ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict);
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
|
|
|
3604df |
index 81704e5..02e8123 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-heal.c
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-heal.c
|
|
|
3604df |
@@ -345,7 +345,7 @@ ec_heal_entry_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
if (source == -1)
|
|
|
3604df |
source = i;
|
|
|
3604df |
|
|
|
3604df |
- ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_VERSION,
|
|
|
3604df |
+ ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_VERSION,
|
|
|
3604df |
xattr, EC_VERSION_SIZE);
|
|
|
3604df |
if (ret == 0) {
|
|
|
3604df |
versions[i] = xattr[EC_DATA_TXN];
|
|
|
3604df |
@@ -356,7 +356,7 @@ ec_heal_entry_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
memset (xattr, 0, sizeof(xattr));
|
|
|
3604df |
- ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_DIRTY,
|
|
|
3604df |
+ ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_DIRTY,
|
|
|
3604df |
xattr, EC_VERSION_SIZE);
|
|
|
3604df |
if (ret == 0) {
|
|
|
3604df |
dirty[i] = xattr[EC_DATA_TXN];
|
|
|
3604df |
@@ -453,6 +453,7 @@ out:
|
|
|
3604df |
loc_wipe (&loc;;
|
|
|
3604df |
return op_ret;
|
|
|
3604df |
}
|
|
|
3604df |
+
|
|
|
3604df |
int
|
|
|
3604df |
ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
uint64_t *versions, uint64_t *dirty,
|
|
|
3604df |
@@ -479,14 +480,14 @@ ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
continue;
|
|
|
3604df |
if (replies[i].op_ret < 0)
|
|
|
3604df |
continue;
|
|
|
3604df |
- ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_VERSION,
|
|
|
3604df |
+ ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_VERSION,
|
|
|
3604df |
xattr, EC_VERSION_SIZE);
|
|
|
3604df |
if (ret == 0) {
|
|
|
3604df |
versions[i] = xattr[EC_METADATA_TXN];
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
memset (xattr, 0, sizeof (xattr));
|
|
|
3604df |
- ret = ec_dict_del_array (replies[i].xdata, EC_XATTR_DIRTY,
|
|
|
3604df |
+ ret = ec_dict_get_array (replies[i].xdata, EC_XATTR_DIRTY,
|
|
|
3604df |
xattr, EC_VERSION_SIZE);
|
|
|
3604df |
if (ret == 0) {
|
|
|
3604df |
dirty[i] = xattr[EC_METADATA_TXN];
|
|
|
3604df |
@@ -1496,26 +1497,22 @@ unlock:
|
|
|
3604df |
/*Find direction for data heal and heal info*/
|
|
|
3604df |
int
|
|
|
3604df |
ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
- uint64_t *data_versions, uint64_t *meta_versions,
|
|
|
3604df |
+ uint64_t *data_versions,
|
|
|
3604df |
uint64_t *dirty, uint64_t *size, unsigned char *sources,
|
|
|
3604df |
- unsigned char *healed_sinks, int which)
|
|
|
3604df |
+ unsigned char *healed_sinks,
|
|
|
3604df |
+ gf_boolean_t check_ondisksize, int which)
|
|
|
3604df |
{
|
|
|
3604df |
uint64_t xattr[EC_VERSION_SIZE] = {0};
|
|
|
3604df |
char version_size[128] = {0};
|
|
|
3604df |
dict_t *version_size_db = NULL;
|
|
|
3604df |
- uint64_t *m_versions = NULL;
|
|
|
3604df |
unsigned char *same = NULL;
|
|
|
3604df |
int max_same_count = 0;
|
|
|
3604df |
int source = 0;
|
|
|
3604df |
int i = 0;
|
|
|
3604df |
int ret = 0;
|
|
|
3604df |
dict_t *dict = NULL;
|
|
|
3604df |
+ uint64_t source_size = 0;
|
|
|
3604df |
|
|
|
3604df |
- if (!meta_versions) {
|
|
|
3604df |
- m_versions = alloca0 (ec->nodes * sizeof (*m_versions));
|
|
|
3604df |
- } else {
|
|
|
3604df |
- m_versions = meta_versions;
|
|
|
3604df |
- }
|
|
|
3604df |
version_size_db = dict_new ();
|
|
|
3604df |
if (!version_size_db) {
|
|
|
3604df |
ret = -ENOMEM;
|
|
|
3604df |
@@ -1530,17 +1527,14 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
dict = (which == EC_COMBINE_XDATA) ? replies[i].xdata :
|
|
|
3604df |
replies[i].xattr;
|
|
|
3604df |
|
|
|
3604df |
- ret = ec_dict_del_array (dict, EC_XATTR_VERSION,
|
|
|
3604df |
+ ret = ec_dict_get_array (dict, EC_XATTR_VERSION,
|
|
|
3604df |
xattr, EC_VERSION_SIZE);
|
|
|
3604df |
if (ret == 0) {
|
|
|
3604df |
data_versions[i] = xattr[EC_DATA_TXN];
|
|
|
3604df |
- if (meta_versions) {
|
|
|
3604df |
- m_versions[i] = xattr[EC_METADATA_TXN];
|
|
|
3604df |
- }
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
memset (xattr, 0, sizeof (xattr));
|
|
|
3604df |
- ret = ec_dict_del_array (dict, EC_XATTR_DIRTY,
|
|
|
3604df |
+ ret = ec_dict_get_array (dict, EC_XATTR_DIRTY,
|
|
|
3604df |
xattr, EC_VERSION_SIZE);
|
|
|
3604df |
if (ret == 0) {
|
|
|
3604df |
dirty[i] = xattr[EC_DATA_TXN];
|
|
|
3604df |
@@ -1549,8 +1543,7 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
&size[i]);
|
|
|
3604df |
/*Build a db of same metadata and data version and size*/
|
|
|
3604df |
snprintf (version_size, sizeof (version_size),
|
|
|
3604df |
- "%"PRIu64"-%"PRIu64"-%"PRIu64, data_versions[i],
|
|
|
3604df |
- m_versions[i], size[i]);
|
|
|
3604df |
+ "%"PRIu64"-%"PRIu64, data_versions[i], size[i]);
|
|
|
3604df |
|
|
|
3604df |
ret = dict_get_bin (version_size_db, version_size,
|
|
|
3604df |
(void **)&same);
|
|
|
3604df |
@@ -1581,9 +1574,7 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
goto out;
|
|
|
3604df |
} else {
|
|
|
3604df |
snprintf (version_size, sizeof (version_size),
|
|
|
3604df |
- "%"PRIu64"-%"PRIu64"-%"PRIu64,
|
|
|
3604df |
- data_versions[source],
|
|
|
3604df |
- m_versions[source],
|
|
|
3604df |
+ "%"PRIu64"-%"PRIu64, data_versions[source],
|
|
|
3604df |
size[source]);
|
|
|
3604df |
|
|
|
3604df |
ret = dict_get_bin (version_size_db, version_size,
|
|
|
3604df |
@@ -1598,6 +1589,30 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+ /* There could be files with versions, size same but on disk ia_size
|
|
|
3604df |
+ * could be different because of disk crashes, mark them as sinks as
|
|
|
3604df |
+ * well*/
|
|
|
3604df |
+
|
|
|
3604df |
+ if (check_ondisksize) {
|
|
|
3604df |
+ source_size = ec_adjust_size (ec, size[source], 1);
|
|
|
3604df |
+
|
|
|
3604df |
+ for (i = 0; i < ec->nodes; i++) {
|
|
|
3604df |
+ if (sources[i]) {
|
|
|
3604df |
+ if (replies[i].stat.ia_size != source_size) {
|
|
|
3604df |
+ sources[i] = 0;
|
|
|
3604df |
+ healed_sinks[i] = 1;
|
|
|
3604df |
+ max_same_count--;
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ source = i;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ if (max_same_count < ec->fragments) {
|
|
|
3604df |
+ ret = -EIO;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
ret = source;
|
|
|
3604df |
out:
|
|
|
3604df |
if (version_size_db)
|
|
|
3604df |
@@ -1613,17 +1628,20 @@ __ec_heal_data_prepare (call_frame_t *frame, ec_t *ec, fd_t *fd,
|
|
|
3604df |
struct iatt *stbuf)
|
|
|
3604df |
{
|
|
|
3604df |
default_args_cbk_t *replies = NULL;
|
|
|
3604df |
+ default_args_cbk_t *fstat_replies = NULL;
|
|
|
3604df |
unsigned char *output = NULL;
|
|
|
3604df |
+ unsigned char *fstat_output = NULL;
|
|
|
3604df |
dict_t *xattrs = NULL;
|
|
|
3604df |
uint64_t zero_array[2] = {0};
|
|
|
3604df |
int source = 0;
|
|
|
3604df |
int ret = 0;
|
|
|
3604df |
uint64_t zero_value = 0;
|
|
|
3604df |
- uint64_t source_size = 0;
|
|
|
3604df |
int i = 0;
|
|
|
3604df |
|
|
|
3604df |
EC_REPLIES_ALLOC (replies, ec->nodes);
|
|
|
3604df |
+ EC_REPLIES_ALLOC (fstat_replies, ec->nodes);
|
|
|
3604df |
output = alloca0(ec->nodes);
|
|
|
3604df |
+ fstat_output = alloca0(ec->nodes);
|
|
|
3604df |
xattrs = dict_new ();
|
|
|
3604df |
if (!xattrs ||
|
|
|
3604df |
dict_set_static_bin (xattrs, EC_XATTR_VERSION, zero_array,
|
|
|
3604df |
@@ -1639,43 +1657,34 @@ __ec_heal_data_prepare (call_frame_t *frame, ec_t *ec, fd_t *fd,
|
|
|
3604df |
ret = cluster_fxattrop (ec->xl_list, locked_on, ec->nodes,
|
|
|
3604df |
replies, output, frame, ec->xl, fd,
|
|
|
3604df |
GF_XATTROP_ADD_ARRAY64, xattrs, NULL);
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = cluster_fstat (ec->xl_list, locked_on, ec->nodes, fstat_replies,
|
|
|
3604df |
+ fstat_output, frame, ec->xl, fd, NULL);
|
|
|
3604df |
+
|
|
|
3604df |
+ for (i = 0; i < ec->nodes; i++) {
|
|
|
3604df |
+ output[i] = output[i] && fstat_output[i];
|
|
|
3604df |
+ replies[i].valid = output[i];
|
|
|
3604df |
+ if (output[i])
|
|
|
3604df |
+ replies[i].stat = fstat_replies[i].stat;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
if (EC_COUNT (output, ec->nodes) <= ec->fragments) {
|
|
|
3604df |
ret = -ENOTCONN;
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- source = ec_heal_data_find_direction (ec, replies, versions, NULL,
|
|
|
3604df |
+ source = ec_heal_data_find_direction (ec, replies, versions,
|
|
|
3604df |
dirty, size, sources,
|
|
|
3604df |
- healed_sinks, EC_COMBINE_DICT);
|
|
|
3604df |
+ healed_sinks, _gf_true,
|
|
|
3604df |
+ EC_COMBINE_DICT);
|
|
|
3604df |
ret = source;
|
|
|
3604df |
if (ret < 0)
|
|
|
3604df |
goto out;
|
|
|
3604df |
|
|
|
3604df |
- /* There could be files with versions, size same but on disk ia_size
|
|
|
3604df |
- * could be different because of disk crashes, mark them as sinks as
|
|
|
3604df |
- * well*/
|
|
|
3604df |
- ret = cluster_fstat (ec->xl_list, locked_on, ec->nodes, replies,
|
|
|
3604df |
- output, frame, ec->xl, fd, NULL);
|
|
|
3604df |
- EC_INTERSECT (sources, sources, output, ec->nodes);
|
|
|
3604df |
- EC_INTERSECT (healed_sinks, healed_sinks, output, ec->nodes);
|
|
|
3604df |
- if (EC_COUNT (sources, ec->nodes) < ec->fragments) {
|
|
|
3604df |
- ret = -ENOTCONN;
|
|
|
3604df |
- goto out;
|
|
|
3604df |
- }
|
|
|
3604df |
-
|
|
|
3604df |
- source_size = ec_adjust_size (ec, size[source], 1);
|
|
|
3604df |
+ if (stbuf)
|
|
|
3604df |
+ *stbuf = replies[source].stat;
|
|
|
3604df |
|
|
|
3604df |
for (i = 0; i < ec->nodes; i++) {
|
|
|
3604df |
- if (sources[i]) {
|
|
|
3604df |
- if (replies[i].stat.ia_size != source_size) {
|
|
|
3604df |
- sources[i] = 0;
|
|
|
3604df |
- healed_sinks[i] = 1;
|
|
|
3604df |
- } else if (stbuf) {
|
|
|
3604df |
- source = i;
|
|
|
3604df |
- *stbuf = replies[i].stat;
|
|
|
3604df |
- }
|
|
|
3604df |
- }
|
|
|
3604df |
-
|
|
|
3604df |
if (healed_sinks[i]) {
|
|
|
3604df |
if (replies[i].stat.ia_size)
|
|
|
3604df |
trim[i] = 1;
|
|
|
3604df |
@@ -1692,6 +1701,7 @@ out:
|
|
|
3604df |
if (xattrs)
|
|
|
3604df |
dict_unref (xattrs);
|
|
|
3604df |
cluster_replies_wipe (replies, ec->nodes);
|
|
|
3604df |
+ cluster_replies_wipe (fstat_replies, ec->nodes);
|
|
|
3604df |
if (ret < 0) {
|
|
|
3604df |
gf_msg_debug (ec->xl->name, 0, "%s: heal failed %s",
|
|
|
3604df |
uuid_utoa (fd->inode->gfid), strerror (-ret));
|
|
|
3604df |
@@ -2345,7 +2355,7 @@ ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
|
|
3604df |
|
|
|
3604df |
frame = create_frame (this, this->ctx->pool);
|
|
|
3604df |
if (!frame)
|
|
|
3604df |
- return;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
|
|
|
3604df |
ec_owner_set(frame, frame->root);
|
|
|
3604df |
/*Do heal as root*/
|
|
|
3604df |
@@ -2359,15 +2369,6 @@ ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
|
|
3604df |
up_subvols = alloca0(ec->nodes);
|
|
|
3604df |
ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes);
|
|
|
3604df |
|
|
|
3604df |
- ec_heal_inspect (frame, ec, loc->inode, up_subvols,
|
|
|
3604df |
- &need_heal);
|
|
|
3604df |
- if (!need_heal) {
|
|
|
3604df |
- gf_msg (ec->xl->name, GF_LOG_DEBUG, 0,
|
|
|
3604df |
- EC_MSG_HEAL_FAIL, "Heal is not required for : %s ",
|
|
|
3604df |
- uuid_utoa(loc->gfid));
|
|
|
3604df |
- goto out;
|
|
|
3604df |
- }
|
|
|
3604df |
-
|
|
|
3604df |
if (loc->name && strlen (loc->name)) {
|
|
|
3604df |
ret = ec_heal_name (frame, ec, loc->parent, (char *)loc->name,
|
|
|
3604df |
participants);
|
|
|
3604df |
@@ -2384,6 +2385,17 @@ ec_heal_do (xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+ /* Mount triggers heal only when it detects that it must need heal, shd
|
|
|
3604df |
+ * triggers heals periodically which need not be thorough*/
|
|
|
3604df |
+ ec_heal_inspect (frame, ec, loc->inode, up_subvols, _gf_false,
|
|
|
3604df |
+ !ec->shd.iamshd, &need_heal);
|
|
|
3604df |
+ if (!need_heal) {
|
|
|
3604df |
+ gf_msg (ec->xl->name, GF_LOG_DEBUG, 0,
|
|
|
3604df |
+ EC_MSG_HEAL_FAIL, "Heal is not required for : %s ",
|
|
|
3604df |
+ uuid_utoa(loc->gfid));
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
msources = alloca0(ec->nodes);
|
|
|
3604df |
mhealed_sinks = alloca0(ec->nodes);
|
|
|
3604df |
ret = ec_heal_metadata (frame, ec, loc->inode, msources, mhealed_sinks);
|
|
|
3604df |
@@ -2423,7 +2435,8 @@ out:
|
|
|
3604df |
ec->nodes),
|
|
|
3604df |
mgood & good, mbad & bad, NULL);
|
|
|
3604df |
}
|
|
|
3604df |
- STACK_DESTROY (frame->root);
|
|
|
3604df |
+ if (frame)
|
|
|
3604df |
+ STACK_DESTROY (frame->root);
|
|
|
3604df |
return;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -2679,40 +2692,170 @@ out:
|
|
|
3604df |
return ret;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
-int32_t
|
|
|
3604df |
-ec_need_heal (ec_t *ec, default_args_cbk_t *replies,
|
|
|
3604df |
- gf_boolean_t *need_heal, int32_t lock_count)
|
|
|
3604df |
+static int32_t
|
|
|
3604df |
+_need_heal_calculate (ec_t *ec, uint64_t *dirty, unsigned char *sources,
|
|
|
3604df |
+ gf_boolean_t self_locked, int32_t lock_count,
|
|
|
3604df |
+ gf_boolean_t *need_heal)
|
|
|
3604df |
+{
|
|
|
3604df |
+ int i = 0;
|
|
|
3604df |
+ int source_count = 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ source_count = EC_COUNT (sources, ec->nodes);
|
|
|
3604df |
+ if (source_count == ec->nodes) {
|
|
|
3604df |
+ *need_heal = _gf_false;
|
|
|
3604df |
+ if (self_locked || lock_count == 0) {
|
|
|
3604df |
+ for (i = 0; i < ec->nodes; i++) {
|
|
|
3604df |
+ if (dirty[i]) {
|
|
|
3604df |
+ *need_heal = _gf_true;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ for (i = 0; i < ec->nodes; i++) {
|
|
|
3604df |
+ /* Since each lock can only increment the dirty
|
|
|
3604df |
+ * count once, if dirty is > 1 it means that
|
|
|
3604df |
+ * another operation has left the dirty count
|
|
|
3604df |
+ * set and this indicates a problem in the
|
|
|
3604df |
+ * inode.*/
|
|
|
3604df |
+ if (dirty[i] > 1) {
|
|
|
3604df |
+ *need_heal = _gf_true;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ *need_heal = _gf_true;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+out:
|
|
|
3604df |
+ return source_count;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+static int32_t
|
|
|
3604df |
+ec_need_metadata_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
|
|
|
3604df |
+ int32_t lock_count, gf_boolean_t self_locked,
|
|
|
3604df |
+ gf_boolean_t thorough, gf_boolean_t *need_heal)
|
|
|
3604df |
{
|
|
|
3604df |
uint64_t *dirty = NULL;
|
|
|
3604df |
unsigned char *sources = NULL;
|
|
|
3604df |
unsigned char *healed_sinks = NULL;
|
|
|
3604df |
- uint64_t *data_versions = NULL;
|
|
|
3604df |
uint64_t *meta_versions = NULL;
|
|
|
3604df |
+ int ret = 0;
|
|
|
3604df |
+ int i = 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ sources = alloca0(ec->nodes);
|
|
|
3604df |
+ healed_sinks = alloca0(ec->nodes);
|
|
|
3604df |
+ dirty = alloca0 (ec->nodes * sizeof (*dirty));
|
|
|
3604df |
+ meta_versions = alloca0 (ec->nodes * sizeof (*meta_versions));
|
|
|
3604df |
+ ret = ec_heal_metadata_find_direction (ec, replies, meta_versions,
|
|
|
3604df |
+ dirty, sources, healed_sinks);
|
|
|
3604df |
+ if (ret < 0 && ret != -EIO) {
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count,
|
|
|
3604df |
+ need_heal);
|
|
|
3604df |
+ if (ret == ec->nodes && !(*need_heal)) {
|
|
|
3604df |
+ for (i = 1; i < ec->nodes; i++) {
|
|
|
3604df |
+ if (meta_versions[i] != meta_versions[0]) {
|
|
|
3604df |
+ *need_heal = _gf_true;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+out:
|
|
|
3604df |
+ return ret;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+static int32_t
|
|
|
3604df |
+ec_need_data_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
|
|
|
3604df |
+ int32_t lock_count, gf_boolean_t self_locked,
|
|
|
3604df |
+ gf_boolean_t thorough, gf_boolean_t *need_heal)
|
|
|
3604df |
+{
|
|
|
3604df |
+ uint64_t *dirty = NULL;
|
|
|
3604df |
+ unsigned char *sources = NULL;
|
|
|
3604df |
+ unsigned char *healed_sinks = NULL;
|
|
|
3604df |
+ uint64_t *data_versions = NULL;
|
|
|
3604df |
uint64_t *size = NULL;
|
|
|
3604df |
int ret = 0;
|
|
|
3604df |
- int source_count = 0;
|
|
|
3604df |
|
|
|
3604df |
sources = alloca0(ec->nodes);
|
|
|
3604df |
healed_sinks = alloca0(ec->nodes);
|
|
|
3604df |
dirty = alloca0 (ec->nodes * sizeof (*dirty));
|
|
|
3604df |
- size = alloca0 (ec->nodes * sizeof (*size));
|
|
|
3604df |
data_versions = alloca0 (ec->nodes * sizeof (*data_versions));
|
|
|
3604df |
- meta_versions = alloca0 (ec->nodes * sizeof (*meta_versions));
|
|
|
3604df |
+ size = alloca0 (ec->nodes * sizeof (*size));
|
|
|
3604df |
|
|
|
3604df |
+ /* When dd is going on and heal info is called there is a very good
|
|
|
3604df |
+ * chance for on disk sizes to mismatch eventhough nothing is wrong
|
|
|
3604df |
+ * we don't need ondisk size check there. But if the file is either
|
|
|
3604df |
+ * self-locked or the caller wants a thorough check then make sure to
|
|
|
3604df |
+ * perform on disk check also. */
|
|
|
3604df |
ret = ec_heal_data_find_direction (ec, replies, data_versions,
|
|
|
3604df |
- meta_versions, dirty, size,
|
|
|
3604df |
- sources, healed_sinks,
|
|
|
3604df |
+ dirty, size, sources, healed_sinks,
|
|
|
3604df |
+ self_locked || thorough,
|
|
|
3604df |
EC_COMBINE_XDATA);
|
|
|
3604df |
if (ret < 0 && ret != -EIO) {
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
- source_count = EC_COUNT (sources, ec->nodes);
|
|
|
3604df |
- if (source_count == ec->nodes && lock_count > 0) {
|
|
|
3604df |
- *need_heal = _gf_false;
|
|
|
3604df |
- } else {
|
|
|
3604df |
- *need_heal = _gf_true;
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count,
|
|
|
3604df |
+ need_heal);
|
|
|
3604df |
+out:
|
|
|
3604df |
+ return ret;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+static int32_t
|
|
|
3604df |
+ec_need_entry_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
|
|
|
3604df |
+ int32_t lock_count, gf_boolean_t self_locked,
|
|
|
3604df |
+ gf_boolean_t thorough, gf_boolean_t *need_heal)
|
|
|
3604df |
+{
|
|
|
3604df |
+ uint64_t *dirty = NULL;
|
|
|
3604df |
+ unsigned char *sources = NULL;
|
|
|
3604df |
+ unsigned char *healed_sinks = NULL;
|
|
|
3604df |
+ uint64_t *data_versions = NULL;
|
|
|
3604df |
+ int ret = 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ sources = alloca0(ec->nodes);
|
|
|
3604df |
+ healed_sinks = alloca0(ec->nodes);
|
|
|
3604df |
+ dirty = alloca0 (ec->nodes * sizeof (*dirty));
|
|
|
3604df |
+ data_versions = alloca0 (ec->nodes * sizeof (*data_versions));
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = ec_heal_entry_find_direction (ec, replies, data_versions,
|
|
|
3604df |
+ dirty, sources, healed_sinks);
|
|
|
3604df |
+ if (ret < 0 && ret != -EIO) {
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = _need_heal_calculate (ec, dirty, sources, self_locked, lock_count,
|
|
|
3604df |
+ need_heal);
|
|
|
3604df |
+out:
|
|
|
3604df |
+ return ret;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+static int32_t
|
|
|
3604df |
+ec_need_heal (ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
|
|
|
3604df |
+ int32_t lock_count, gf_boolean_t self_locked,
|
|
|
3604df |
+ gf_boolean_t thorough, gf_boolean_t *need_heal)
|
|
|
3604df |
+{
|
|
|
3604df |
+ int ret = 0;
|
|
|
3604df |
+
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = ec_need_metadata_heal (ec, inode, replies, lock_count,
|
|
|
3604df |
+ self_locked, thorough, need_heal);
|
|
|
3604df |
+ if (ret < 0)
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (*need_heal)
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (inode->ia_type == IA_IFREG) {
|
|
|
3604df |
+ ret = ec_need_data_heal (ec, inode, replies, lock_count,
|
|
|
3604df |
+ self_locked, thorough, need_heal);
|
|
|
3604df |
+ } else if (inode->ia_type == IA_IFDIR) {
|
|
|
3604df |
+ ret = ec_need_entry_heal (ec, inode, replies, lock_count,
|
|
|
3604df |
+ self_locked, thorough, need_heal);
|
|
|
3604df |
}
|
|
|
3604df |
- ret = source_count;
|
|
|
3604df |
+
|
|
|
3604df |
out:
|
|
|
3604df |
return ret;
|
|
|
3604df |
}
|
|
|
3604df |
@@ -2720,6 +2863,7 @@ out:
|
|
|
3604df |
int32_t
|
|
|
3604df |
ec_heal_inspect (call_frame_t *frame, ec_t *ec,
|
|
|
3604df |
inode_t *inode, unsigned char *locked_on,
|
|
|
3604df |
+ gf_boolean_t self_locked, gf_boolean_t thorough,
|
|
|
3604df |
gf_boolean_t *need_heal)
|
|
|
3604df |
{
|
|
|
3604df |
loc_t loc = {0};
|
|
|
3604df |
@@ -2740,8 +2884,6 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
|
|
|
3604df |
|
|
|
3604df |
xdata = dict_new ();
|
|
|
3604df |
if (!xdata ||
|
|
|
3604df |
- dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT,
|
|
|
3604df |
- ec->xl->name) ||
|
|
|
3604df |
dict_set_static_bin (xdata, EC_XATTR_VERSION, zero_array,
|
|
|
3604df |
sizeof (zero_array)) ||
|
|
|
3604df |
dict_set_static_bin (xdata, EC_XATTR_DIRTY, zero_array,
|
|
|
3604df |
@@ -2751,6 +2893,16 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
|
|
|
3604df |
ret = -ENOMEM;
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
+
|
|
|
3604df |
+ if (!self_locked) {
|
|
|
3604df |
+ ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT,
|
|
|
3604df |
+ ec->xl->name);
|
|
|
3604df |
+ if (ret) {
|
|
|
3604df |
+ ret = -ENOMEM;
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies,
|
|
|
3604df |
output, frame, ec->xl, &loc, xdata);
|
|
|
3604df |
|
|
|
3604df |
@@ -2760,6 +2912,9 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+ if (self_locked)
|
|
|
3604df |
+ goto need_heal;
|
|
|
3604df |
+
|
|
|
3604df |
for (i = 0; i < ec->nodes; i++) {
|
|
|
3604df |
if (!output[i] || !replies[i].xdata) {
|
|
|
3604df |
continue;
|
|
|
3604df |
@@ -2769,7 +2924,9 @@ ec_heal_inspect (call_frame_t *frame, ec_t *ec,
|
|
|
3604df |
break;
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
- ret = ec_need_heal (ec, replies, need_heal, lock_count);
|
|
|
3604df |
+need_heal:
|
|
|
3604df |
+ ret = ec_need_heal (ec, inode, replies, lock_count,
|
|
|
3604df |
+ self_locked, thorough, need_heal);
|
|
|
3604df |
|
|
|
3604df |
out:
|
|
|
3604df |
cluster_replies_wipe (replies, ec->nodes);
|
|
|
3604df |
@@ -2803,8 +2960,8 @@ ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode,
|
|
|
3604df |
*need_heal = _gf_true;
|
|
|
3604df |
goto unlock;
|
|
|
3604df |
}
|
|
|
3604df |
- ret = ec_heal_inspect (frame, ec, inode,
|
|
|
3604df |
- locked_on, need_heal);
|
|
|
3604df |
+ ret = ec_heal_inspect (frame, ec, inode, locked_on, _gf_true, _gf_true,
|
|
|
3604df |
+ need_heal);
|
|
|
3604df |
unlock:
|
|
|
3604df |
cluster_uninodelk (ec->xl_list, locked_on, ec->nodes,
|
|
|
3604df |
replies, output, frame, ec->xl,
|
|
|
3604df |
@@ -2852,9 +3009,9 @@ ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- ret = ec_heal_inspect (frame, ec, loc.inode, up_subvols,
|
|
|
3604df |
- &need_heal);
|
|
|
3604df |
- if (ret == ec->nodes) {
|
|
|
3604df |
+ ret = ec_heal_inspect (frame, ec, loc.inode, up_subvols, _gf_false,
|
|
|
3604df |
+ _gf_false, &need_heal);
|
|
|
3604df |
+ if (ret == ec->nodes && !need_heal) {
|
|
|
3604df |
goto set_heal;
|
|
|
3604df |
}
|
|
|
3604df |
need_heal = _gf_false;
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
|
|
|
3604df |
index 7df8312..966639d 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-helpers.c
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-helpers.c
|
|
|
3604df |
@@ -162,8 +162,8 @@ int32_t ec_dict_set_array(dict_t *dict, char *key, uint64_t value[],
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
|
|
|
3604df |
-int32_t ec_dict_del_array(dict_t *dict, char *key, uint64_t value[],
|
|
|
3604df |
- int32_t size)
|
|
|
3604df |
+int32_t
|
|
|
3604df |
+ec_dict_get_array (dict_t *dict, char *key, uint64_t value[], int32_t size)
|
|
|
3604df |
{
|
|
|
3604df |
void *ptr;
|
|
|
3604df |
int32_t len;
|
|
|
3604df |
@@ -197,11 +197,21 @@ int32_t ec_dict_del_array(dict_t *dict, char *key, uint64_t value[],
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- dict_del(dict, key);
|
|
|
3604df |
-
|
|
|
3604df |
return 0;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+int32_t
|
|
|
3604df |
+ec_dict_del_array (dict_t *dict, char *key, uint64_t value[], int32_t size)
|
|
|
3604df |
+{
|
|
|
3604df |
+ int ret = 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ ret = ec_dict_get_array (dict, key, value, size);
|
|
|
3604df |
+ if (ret == 0)
|
|
|
3604df |
+ dict_del(dict, key);
|
|
|
3604df |
+
|
|
|
3604df |
+ return ret;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
|
|
|
3604df |
int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value)
|
|
|
3604df |
{
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
|
|
|
3604df |
index 93d7772..39c67ba 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-helpers.h
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-helpers.h
|
|
|
3604df |
@@ -22,6 +22,9 @@ size_t ec_iov_copy_to(void * dst, struct iovec * vector, int32_t count,
|
|
|
3604df |
|
|
|
3604df |
int32_t ec_dict_set_array(dict_t *dict, char *key,
|
|
|
3604df |
uint64_t *value, int32_t size);
|
|
|
3604df |
+int32_t ec_dict_get_array (dict_t *dict, char *key, uint64_t value[],
|
|
|
3604df |
+ int32_t size);
|
|
|
3604df |
+
|
|
|
3604df |
int32_t ec_dict_del_array(dict_t *dict, char *key,
|
|
|
3604df |
uint64_t *value, int32_t size);
|
|
|
3604df |
int32_t ec_dict_set_number(dict_t * dict, char * key, uint64_t value);
|
|
|
3604df |
--
|
|
|
3604df |
2.9.3
|
|
|
3604df |
|