7f4c2a
From 0d2dd0b1395397fced99b62dd826d4ab18fb94f1 Mon Sep 17 00:00:00 2001
7f4c2a
From: Xavier Hernandez <xhernandez@datalab.es>
7f4c2a
Date: Fri, 7 Aug 2015 12:37:52 +0200
7f4c2a
Subject: [PATCH 318/320] cluster/ec: Fix write size in self-heal
7f4c2a
7f4c2a
Self-heal was always using a fixed block size to heal a file. This
7f4c2a
was incorrect for dispersed volumes with a number of data bricks not
7f4c2a
being a power of 2.
7f4c2a
7f4c2a
This patch adjusts the block size to a multiple of the stripe size
7f4c2a
of the volume. It also propagates errors detected during the data
7f4c2a
heal to stop healing the file and not mark it as healed.
7f4c2a
7f4c2a
> Change-Id: I9ee3fde98a9e5d6116fd096ceef88686fd1d28e2
7f4c2a
> BUG: 1251446
7f4c2a
> Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
7f4c2a
> Reviewed-on: http://review.gluster.org/11862
7f4c2a
> Tested-by: NetBSD Build System <jenkins@build.gluster.org>
7f4c2a
> Tested-by: Gluster Build System <jenkins@build.gluster.com>
7f4c2a
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
7f4c2a
7f4c2a
3.7:    http://review.gluster.org/11869
7f4c2a
7f4c2a
BUG: 1241862
7f4c2a
Change-Id: I3bbe6ed6ff60b3efcc08d7425678bb9aeb5ddb11
7f4c2a
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/56691
7f4c2a
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
7f4c2a
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
7f4c2a
---
7f4c2a
 tests/bugs/disperse/bug-1251446.t |   50 +++++++++++++++++++++++++++++++++++++
7f4c2a
 xlators/cluster/ec/src/ec-data.h  |    1 +
7f4c2a
 xlators/cluster/ec/src/ec-heal.c  |    9 ++++++
7f4c2a
 3 files changed, 60 insertions(+), 0 deletions(-)
7f4c2a
 create mode 100644 tests/bugs/disperse/bug-1251446.t
7f4c2a
7f4c2a
diff --git a/tests/bugs/disperse/bug-1251446.t b/tests/bugs/disperse/bug-1251446.t
7f4c2a
new file mode 100644
7f4c2a
index 0000000..f805539
7f4c2a
--- /dev/null
7f4c2a
+++ b/tests/bugs/disperse/bug-1251446.t
7f4c2a
@@ -0,0 +1,50 @@
7f4c2a
+#!/bin/bash
7f4c2a
+
7f4c2a
+. $(dirname $0)/../../include.rc
7f4c2a
+. $(dirname $0)/../../volume.rc
7f4c2a
+
7f4c2a
+cleanup
7f4c2a
+
7f4c2a
+TEST glusterd
7f4c2a
+TEST pidof glusterd
7f4c2a
+TEST $CLI volume create $V0 disperse 4 redundancy 1 $H0:$B0/${V0}{0..3}
7f4c2a
+TEST $CLI volume start $V0
7f4c2a
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
7f4c2a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
7f4c2a
+
7f4c2a
+TEST dd if=/dev/urandom of=$M0/test1 bs=1024k count=2
7f4c2a
+cs=$(sha1sum $M0/test1 | awk '{ print $1 }')
7f4c2a
+
7f4c2a
+TEST kill_brick $V0 $H0 $B0/${V0}0
7f4c2a
+EXPECT '3' online_brick_count
7f4c2a
+
7f4c2a
+TEST cp $M0/test1 $M0/test2
7f4c2a
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
7f4c2a
+
7f4c2a
+TEST $CLI volume start $V0 force
7f4c2a
+EXPECT '4' online_brick_count
7f4c2a
+
7f4c2a
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
7f4c2a
+TEST $CLI volume heal $V0 full
7f4c2a
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
7f4c2a
+
7f4c2a
+EXPECT "699392" stat -c "%s" $B0/${V0}0/test2
7f4c2a
+
7f4c2a
+# force cache clear
7f4c2a
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
7f4c2a
+TEST $CLI volume stop $V0
7f4c2a
+TEST $CLI volume start $V0
7f4c2a
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
7f4c2a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
7f4c2a
+
7f4c2a
+TEST kill_brick $V0 $H0 $B0/${V0}3
7f4c2a
+EXPECT '3' online_brick_count
7f4c2a
+
7f4c2a
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
7f4c2a
+
7f4c2a
+## cleanup
7f4c2a
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
7f4c2a
+TEST $CLI volume stop $V0
7f4c2a
+TEST $CLI volume delete $V0
7f4c2a
+
7f4c2a
+cleanup;
7f4c2a
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
7f4c2a
index ec470e9..1008706 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec-data.h
7f4c2a
+++ b/xlators/cluster/ec/src/ec-data.h
7f4c2a
@@ -285,6 +285,7 @@ struct _ec_heal
7f4c2a
     fd_t             *fd;
7f4c2a
     int32_t           partial;
7f4c2a
     int32_t           done;
7f4c2a
+    int32_t           error;
7f4c2a
     gf_boolean_t      nameheal;
7f4c2a
     uintptr_t         available;
7f4c2a
     uintptr_t         good;
7f4c2a
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
7f4c2a
index a7c97a5..fde7e31 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec-heal.c
7f4c2a
+++ b/xlators/cluster/ec/src/ec-heal.c
7f4c2a
@@ -1779,6 +1779,7 @@ ec_heal_block_done (call_frame_t *frame, void *cookie, xlator_t *this,
7f4c2a
 
7f4c2a
         fop->heal = NULL;
7f4c2a
         heal->fop = NULL;
7f4c2a
+        heal->error = op_ret < 0 ? op_errno : 0;
7f4c2a
         syncbarrier_wake (heal->data);
7f4c2a
         return 0;
7f4c2a
 }
7f4c2a
@@ -1789,6 +1790,9 @@ ec_sync_heal_block (call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
7f4c2a
         ec_heal_block (frame, this, heal->bad|heal->good, EC_MINIMUM_ONE,
7f4c2a
                        ec_heal_block_done, heal);
7f4c2a
         syncbarrier_wait (heal->data, 1);
7f4c2a
+        if (heal->error != 0) {
7f4c2a
+                return -heal->error;
7f4c2a
+        }
7f4c2a
         if (heal->bad == 0)
7f4c2a
                 return -ENOTCONN;
7f4c2a
         return 0;
7f4c2a
@@ -1814,6 +1818,11 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
7f4c2a
         pool = ec->xl->ctx->iobuf_pool;
7f4c2a
         heal->total_size = size;
7f4c2a
         heal->size = iobpool_default_pagesize (pool);
7f4c2a
+        /* We need to adjust the size to a multiple of the stripe size of the
7f4c2a
+         * volume. Otherwise writes would need to fill gaps (head and/or tail)
7f4c2a
+         * with existent data from the bad bricks. This could be garbage on a
7f4c2a
+         * damaged file or it could fail if there aren't enough bricks. */
7f4c2a
+        heal->size -= heal->size % ec->stripe_size;
7f4c2a
         heal->bad       = ec_char_array_to_mask (healed_sinks, ec->nodes);
7f4c2a
         heal->good      = ec_char_array_to_mask (sources, ec->nodes);
7f4c2a
         heal->iatt.ia_type = IA_IFREG;
7f4c2a
-- 
7f4c2a
1.7.1
7f4c2a