cb8e9e
From 0d2dd0b1395397fced99b62dd826d4ab18fb94f1 Mon Sep 17 00:00:00 2001
cb8e9e
From: Xavier Hernandez <xhernandez@datalab.es>
cb8e9e
Date: Fri, 7 Aug 2015 12:37:52 +0200
cb8e9e
Subject: [PATCH 318/320] cluster/ec: Fix write size in self-heal
cb8e9e
cb8e9e
Self-heal was always using a fixed block size to heal a file. This
cb8e9e
was incorrect for dispersed volumes with a number of data bricks not
cb8e9e
being a power of 2.
cb8e9e
cb8e9e
This patch adjusts the block size to a multiple of the stripe size
cb8e9e
of the volume. It also propagates errors detected during the data
cb8e9e
heal to stop healing the file and not mark it as healed.
cb8e9e
cb8e9e
> Change-Id: I9ee3fde98a9e5d6116fd096ceef88686fd1d28e2
cb8e9e
> BUG: 1251446
cb8e9e
> Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
cb8e9e
> Reviewed-on: http://review.gluster.org/11862
cb8e9e
> Tested-by: NetBSD Build System <jenkins@build.gluster.org>
cb8e9e
> Tested-by: Gluster Build System <jenkins@build.gluster.com>
cb8e9e
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
cb8e9e
cb8e9e
3.7:    http://review.gluster.org/11869
cb8e9e
cb8e9e
BUG: 1241862
cb8e9e
Change-Id: I3bbe6ed6ff60b3efcc08d7425678bb9aeb5ddb11
cb8e9e
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/56691
cb8e9e
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
cb8e9e
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
cb8e9e
---
cb8e9e
 tests/bugs/disperse/bug-1251446.t |   50 +++++++++++++++++++++++++++++++++++++
cb8e9e
 xlators/cluster/ec/src/ec-data.h  |    1 +
cb8e9e
 xlators/cluster/ec/src/ec-heal.c  |    9 ++++++
cb8e9e
 3 files changed, 60 insertions(+), 0 deletions(-)
cb8e9e
 create mode 100644 tests/bugs/disperse/bug-1251446.t
cb8e9e
cb8e9e
diff --git a/tests/bugs/disperse/bug-1251446.t b/tests/bugs/disperse/bug-1251446.t
cb8e9e
new file mode 100644
cb8e9e
index 0000000..f805539
cb8e9e
--- /dev/null
cb8e9e
+++ b/tests/bugs/disperse/bug-1251446.t
cb8e9e
@@ -0,0 +1,50 @@
cb8e9e
+#!/bin/bash
cb8e9e
+
cb8e9e
+. $(dirname $0)/../../include.rc
cb8e9e
+. $(dirname $0)/../../volume.rc
cb8e9e
+
cb8e9e
+cleanup
cb8e9e
+
cb8e9e
+TEST glusterd
cb8e9e
+TEST pidof glusterd
cb8e9e
+TEST $CLI volume create $V0 disperse 4 redundancy 1 $H0:$B0/${V0}{0..3}
cb8e9e
+TEST $CLI volume start $V0
cb8e9e
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
cb8e9e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
cb8e9e
+
cb8e9e
+TEST dd if=/dev/urandom of=$M0/test1 bs=1024k count=2
cb8e9e
+cs=$(sha1sum $M0/test1 | awk '{ print $1 }')
cb8e9e
+
cb8e9e
+TEST kill_brick $V0 $H0 $B0/${V0}0
cb8e9e
+EXPECT '3' online_brick_count
cb8e9e
+
cb8e9e
+TEST cp $M0/test1 $M0/test2
cb8e9e
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
cb8e9e
+
cb8e9e
+TEST $CLI volume start $V0 force
cb8e9e
+EXPECT '4' online_brick_count
cb8e9e
+
cb8e9e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
cb8e9e
+TEST $CLI volume heal $V0 full
cb8e9e
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
cb8e9e
+
cb8e9e
+EXPECT "699392" stat -c "%s" $B0/${V0}0/test2
cb8e9e
+
cb8e9e
+# force cache clear
cb8e9e
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
cb8e9e
+TEST $CLI volume stop $V0
cb8e9e
+TEST $CLI volume start $V0
cb8e9e
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
cb8e9e
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
cb8e9e
+
cb8e9e
+TEST kill_brick $V0 $H0 $B0/${V0}3
cb8e9e
+EXPECT '3' online_brick_count
cb8e9e
+
cb8e9e
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
cb8e9e
+
cb8e9e
+## cleanup
cb8e9e
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
cb8e9e
+TEST $CLI volume stop $V0
cb8e9e
+TEST $CLI volume delete $V0
cb8e9e
+
cb8e9e
+cleanup;
cb8e9e
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
cb8e9e
index ec470e9..1008706 100644
cb8e9e
--- a/xlators/cluster/ec/src/ec-data.h
cb8e9e
+++ b/xlators/cluster/ec/src/ec-data.h
cb8e9e
@@ -285,6 +285,7 @@ struct _ec_heal
cb8e9e
     fd_t             *fd;
cb8e9e
     int32_t           partial;
cb8e9e
     int32_t           done;
cb8e9e
+    int32_t           error;
cb8e9e
     gf_boolean_t      nameheal;
cb8e9e
     uintptr_t         available;
cb8e9e
     uintptr_t         good;
cb8e9e
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
cb8e9e
index a7c97a5..fde7e31 100644
cb8e9e
--- a/xlators/cluster/ec/src/ec-heal.c
cb8e9e
+++ b/xlators/cluster/ec/src/ec-heal.c
cb8e9e
@@ -1779,6 +1779,7 @@ ec_heal_block_done (call_frame_t *frame, void *cookie, xlator_t *this,
cb8e9e
 
cb8e9e
         fop->heal = NULL;
cb8e9e
         heal->fop = NULL;
cb8e9e
+        heal->error = op_ret < 0 ? op_errno : 0;
cb8e9e
         syncbarrier_wake (heal->data);
cb8e9e
         return 0;
cb8e9e
 }
cb8e9e
@@ -1789,6 +1790,9 @@ ec_sync_heal_block (call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
cb8e9e
         ec_heal_block (frame, this, heal->bad|heal->good, EC_MINIMUM_ONE,
cb8e9e
                        ec_heal_block_done, heal);
cb8e9e
         syncbarrier_wait (heal->data, 1);
cb8e9e
+        if (heal->error != 0) {
cb8e9e
+                return -heal->error;
cb8e9e
+        }
cb8e9e
         if (heal->bad == 0)
cb8e9e
                 return -ENOTCONN;
cb8e9e
         return 0;
cb8e9e
@@ -1814,6 +1818,11 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
cb8e9e
         pool = ec->xl->ctx->iobuf_pool;
cb8e9e
         heal->total_size = size;
cb8e9e
         heal->size = iobpool_default_pagesize (pool);
cb8e9e
+        /* We need to adjust the size to a multiple of the stripe size of the
cb8e9e
+         * volume. Otherwise writes would need to fill gaps (head and/or tail)
cb8e9e
+         * with existent data from the bad bricks. This could be garbage on a
cb8e9e
+         * damaged file or it could fail if there aren't enough bricks. */
cb8e9e
+        heal->size -= heal->size % ec->stripe_size;
cb8e9e
         heal->bad       = ec_char_array_to_mask (healed_sinks, ec->nodes);
cb8e9e
         heal->good      = ec_char_array_to_mask (sources, ec->nodes);
cb8e9e
         heal->iatt.ia_type = IA_IFREG;
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e