Blob Blame History Raw
From 98578c10146e7e0e837771b050f884d8c0b3a3d3 Mon Sep 17 00:00:00 2001
From: Ashish Pandey <aspandey@redhat.com>
Date: Mon, 28 Nov 2016 13:42:33 +0530
Subject: [PATCH 346/361] cluster/ec: Healing should not start if only "data"
 bricks are UP

Problem: In a disperse volume with "K+R" configuration, where
"K" is the number of data bricks and "R" is the number of redundancy
bricks (Total number of bricks, N = K+R), if only K bricks are UP,
we should NOT start heal process. This is because the bricks, which
are supposed to be healed, are not UP. This will unnecessary
eat up the resources.

Solution: Check for the number of xl_up_count and only
if it is greater than ec->fragments (number of data bricks),
start heal process.

mainline:
> BUG: 1399072
> Reviewed-on: http://review.gluster.org/15937
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
(cherry picked from commit e64227dc7c70e91f662f4bab32e4d81c76cbb8e8)

BUG: 1396010
Change-Id: I8579f39cfb47b65ff0f76e623b048bd67b15473b
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/101286
Tested-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 xlators/cluster/ec/src/ec-heald.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 9860f10..ffb78d5 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -330,18 +330,20 @@ ec_shd_index_healer (void *data)
 
         healer = data;
         THIS = this = healer->this;
+        ec_t *ec = this->private;
 
         for (;;) {
                 ec_shd_healer_wait (healer);
 
                 ASSERT_LOCAL(this, healer);
 
-                gf_msg_debug (this->name, 0,
-                        "starting index sweep on subvol %s",
-                        ec_subvol_name (this, healer->subvol));
-
-                ec_shd_index_sweep (healer);
 
+                if (ec->xl_up_count > ec->fragments) {
+                        gf_msg_debug (this->name, 0,
+                                "starting index sweep on subvol %s",
+                                ec_subvol_name (this, healer->subvol));
+                        ec_shd_index_sweep (healer);
+                }
                 gf_msg_debug (this->name, 0,
                         "finished index sweep on subvol %s",
                         ec_subvol_name (this, healer->subvol));
@@ -362,6 +364,7 @@ ec_shd_full_healer (void *data)
 
         healer = data;
         THIS = this = healer->this;
+        ec_t *ec = this->private;
 
         rootloc.inode = this->itable->root;
         for (;;) {
@@ -378,13 +381,16 @@ ec_shd_full_healer (void *data)
 
                 ASSERT_LOCAL(this, healer);
 
-                gf_msg (this->name, GF_LOG_INFO, 0,
-                        EC_MSG_FULL_SWEEP_START,
-                        "starting full sweep on subvol %s",
-                        ec_subvol_name (this, healer->subvol));
 
-                ec_shd_selfheal (healer, healer->subvol, &rootloc);
-                ec_shd_full_sweep (healer, this->itable->root);
+                if (ec->xl_up_count > ec->fragments) {
+                        gf_msg (this->name, GF_LOG_INFO, 0,
+                                EC_MSG_FULL_SWEEP_START,
+                                "starting full sweep on subvol %s",
+                                ec_subvol_name (this, healer->subvol));
+
+                        ec_shd_selfheal (healer, healer->subvol, &rootloc);
+                        ec_shd_full_sweep (healer, this->itable->root);
+                }
 
                 gf_msg (this->name, GF_LOG_INFO, 0,
                         EC_MSG_FULL_SWEEP_STOP,
-- 
1.8.3.1