From 0d2dd0b1395397fced99b62dd826d4ab18fb94f1 Mon Sep 17 00:00:00 2001 From: Xavier Hernandez Date: Fri, 7 Aug 2015 12:37:52 +0200 Subject: [PATCH 318/320] cluster/ec: Fix write size in self-heal Self-heal was always using a fixed block size to heal a file. This was incorrect for dispersed volumes with a number of data bricks not being a power of 2. This patch adjusts the block size to a multiple of the stripe size of the volume. It also propagates errors detected during the data heal to stop healing the file and not mark it as healed. > Change-Id: I9ee3fde98a9e5d6116fd096ceef88686fd1d28e2 > BUG: 1251446 > Signed-off-by: Xavier Hernandez > Reviewed-on: http://review.gluster.org/11862 > Tested-by: NetBSD Build System > Tested-by: Gluster Build System > Reviewed-by: Pranith Kumar Karampuri 3.7: http://review.gluster.org/11869 BUG: 1241862 Change-Id: I3bbe6ed6ff60b3efcc08d7425678bb9aeb5ddb11 Signed-off-by: Xavier Hernandez Reviewed-on: https://code.engineering.redhat.com/gerrit/56691 Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri --- tests/bugs/disperse/bug-1251446.t | 50 +++++++++++++++++++++++++++++++++++++ xlators/cluster/ec/src/ec-data.h | 1 + xlators/cluster/ec/src/ec-heal.c | 9 ++++++ 3 files changed, 60 insertions(+), 0 deletions(-) create mode 100644 tests/bugs/disperse/bug-1251446.t diff --git a/tests/bugs/disperse/bug-1251446.t b/tests/bugs/disperse/bug-1251446.t new file mode 100644 index 0000000..f805539 --- /dev/null +++ b/tests/bugs/disperse/bug-1251446.t @@ -0,0 +1,50 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 4 redundancy 1 $H0:$B0/${V0}{0..3} +TEST $CLI volume start $V0 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0 + +TEST dd if=/dev/urandom of=$M0/test1 bs=1024k count=2 +cs=$(sha1sum $M0/test1 | awk '{ print $1 }') + +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT '3' online_brick_count + +TEST cp $M0/test1 $M0/test2 +EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }') + +TEST $CLI volume start $V0 force +EXPECT '4' online_brick_count + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid +TEST $CLI volume heal $V0 full +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + +EXPECT "699392" stat -c "%s" $B0/${V0}0/test2 + +# force cache clear +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume stop $V0 +TEST $CLI volume start $V0 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0 + +TEST kill_brick $V0 $H0 $B0/${V0}3 +EXPECT '3' online_brick_count + +EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }') + +## cleanup +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup; diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h index ec470e9..1008706 100644 --- a/xlators/cluster/ec/src/ec-data.h +++ b/xlators/cluster/ec/src/ec-data.h @@ -285,6 +285,7 @@ struct _ec_heal fd_t *fd; int32_t partial; int32_t done; + int32_t error; gf_boolean_t nameheal; uintptr_t available; uintptr_t good; diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index a7c97a5..fde7e31 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -1779,6 +1779,7 @@ ec_heal_block_done (call_frame_t *frame, void *cookie, xlator_t *this, fop->heal = NULL; heal->fop = NULL; + heal->error = op_ret < 0 ? op_errno : 0; syncbarrier_wake (heal->data); return 0; } @@ -1789,6 +1790,9 @@ ec_sync_heal_block (call_frame_t *frame, xlator_t *this, ec_heal_t *heal) ec_heal_block (frame, this, heal->bad|heal->good, EC_MINIMUM_ONE, ec_heal_block_done, heal); syncbarrier_wait (heal->data, 1); + if (heal->error != 0) { + return -heal->error; + } if (heal->bad == 0) return -ENOTCONN; return 0; @@ -1814,6 +1818,11 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size, pool = ec->xl->ctx->iobuf_pool; heal->total_size = size; heal->size = iobpool_default_pagesize (pool); + /* We need to adjust the size to a multiple of the stripe size of the + * volume. Otherwise writes would need to fill gaps (head and/or tail) + * with existent data from the bad bricks. This could be garbage on a + * damaged file or it could fail if there aren't enough bricks. */ + heal->size -= heal->size % ec->stripe_size; heal->bad = ec_char_array_to_mask (healed_sinks, ec->nodes); heal->good = ec_char_array_to_mask (sources, ec->nodes); heal->iatt.ia_type = IA_IFREG; -- 1.7.1