From 7bf42aafad24b945acf0affa3bf7387ebaab4ea4 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Wed, 4 May 2016 19:05:28 +0530
Subject: [PATCH 130/139] cluster/afr: Do heals with shd pid
Multi-threaded healing doesn't create synctask with shd pid, this
leads to healing problems when quota exceeds.
>BUG: 1332994
>Change-Id: I80f57c1923756f3298730b8820498127024e1209
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
>Reviewed-on: http://review.gluster.org/14211
>Smoke: Gluster Build System <jenkins@build.gluster.com>
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
>CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
>Reviewed-by: Ravishankar N <ravishankar@redhat.com>
BUG: 1332199
Change-Id: I8979b80067214804a24d5a25ae0cd3e3e15faacc
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/73766
---
libglusterfs/src/syncop-utils.c | 19 ++++++++-------
libglusterfs/src/syncop-utils.h | 6 ++--
tests/basic/afr/heal-quota.t | 35 ++++++++++++++++++++++++++++++
xlators/cluster/afr/src/afr-self-heald.c | 11 ++++++++-
4 files changed, 58 insertions(+), 13 deletions(-)
create mode 100644 tests/basic/afr/heal-quota.t
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
index 5e6b9fa..8998c06 100644
--- a/libglusterfs/src/syncop-utils.c
+++ b/libglusterfs/src/syncop-utils.c
@@ -289,10 +289,11 @@ _dir_scan_job_fn (void *data)
}
static int
-_run_dir_scan_task (xlator_t *subvol, loc_t *parent, gf_dirent_t *q,
- gf_dirent_t *entry, int *retval, pthread_mutex_t *mut,
- pthread_cond_t *cond, uint32_t *jobs_running,
- uint32_t *qlen, syncop_dir_scan_fn_t fn, void *data)
+_run_dir_scan_task (call_frame_t *frame, xlator_t *subvol, loc_t *parent,
+ gf_dirent_t *q, gf_dirent_t *entry, int *retval,
+ pthread_mutex_t *mut, pthread_cond_t *cond,
+ uint32_t *jobs_running, uint32_t *qlen,
+ syncop_dir_scan_fn_t fn, void *data)
{
int ret = 0;
struct syncop_dir_scan_data *scan_data = NULL;
@@ -318,7 +319,7 @@ _run_dir_scan_task (xlator_t *subvol, loc_t *parent, gf_dirent_t *q,
scan_data->retval = retval;
ret = synctask_new (subvol->ctx->env, _dir_scan_job_fn,
- _dir_scan_job_fn_cbk, NULL, scan_data);
+ _dir_scan_job_fn_cbk, frame, scan_data);
out:
if (ret < 0) {
gf_dirent_entry_free (entry);
@@ -334,9 +335,9 @@ out:
}
int
-syncop_mt_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data,
- syncop_dir_scan_fn_t fn, dict_t *xdata, uint32_t max_jobs,
- uint32_t max_qlen)
+syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ void *data, syncop_dir_scan_fn_t fn, dict_t *xdata,
+ uint32_t max_jobs, uint32_t max_qlen)
{
fd_t *fd = NULL;
uint64_t offset = 0;
@@ -433,7 +434,7 @@ syncop_mt_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data,
if (!entry)
continue;
- ret = _run_dir_scan_task (subvol, loc, &q, entry,
+ ret = _run_dir_scan_task (frame, subvol, loc, &q, entry,
&retval, &mut, &cond,
&jobs_running, &qlen, fn, data);
if (ret)
diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/syncop-utils.h
index 52bcfd9..3968d75 100644
--- a/libglusterfs/src/syncop-utils.h
+++ b/libglusterfs/src/syncop-utils.h
@@ -19,9 +19,9 @@ syncop_ftw (xlator_t *subvol, loc_t *loc, int pid, void *data,
void *data));
int
-syncop_mt_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data,
- syncop_dir_scan_fn_t fn, dict_t *xdata, uint32_t max_jobs,
- uint32_t max_qlen);
+syncop_mt_dir_scan (call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ void *data, syncop_dir_scan_fn_t fn, dict_t *xdata,
+ uint32_t max_jobs, uint32_t max_qlen);
int
syncop_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data,
diff --git a/tests/basic/afr/heal-quota.t b/tests/basic/afr/heal-quota.t
new file mode 100644
index 0000000..2663906
--- /dev/null
+++ b/tests/basic/afr/heal-quota.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#This file tests that heal succeeds even when quota is exceeded
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 10MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST touch $M0/a $M0/b
+dd if=/dev/zero of=$M0/b bs=1M count=7
+TEST kill_brick $V0 $H0 $B0/${V0}0
+dd if=/dev/zero of=$M0/a bs=1M count=12 #This shall fail
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index d89692d..2fc1b63 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -435,10 +435,17 @@ afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid)
int ret = 0;
xlator_t *subvol = NULL;
dict_t *xdata = NULL;
+ call_frame_t *frame = NULL;
priv = healer->this->private;
subvol = priv->children[healer->subvol];
+ frame = afr_frame_create (healer->this);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
loc.inode = afr_shd_index_inode (healer->this, subvol, vgfid);
if (!loc.inode) {
gf_msg (healer->this->name, GF_LOG_WARNING,
@@ -454,7 +461,7 @@ afr_shd_index_sweep (struct subvol_healer *healer, char *vgfid)
goto out;
}
- ret = syncop_mt_dir_scan (subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ ret = syncop_mt_dir_scan (frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
healer, afr_shd_index_heal, xdata,
priv->shd.max_threads, priv->shd.wait_qlength);
@@ -466,6 +473,8 @@ out:
if (xdata)
dict_unref (xdata);
+ if (frame)
+ AFR_STACK_DESTROY (frame);
return ret;
}
--
1.7.1