|
|
cb8e9e |
From 1c94a3d3bd4ac1237d497f03a899ba29590fc37f Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
cb8e9e |
Date: Mon, 15 Jun 2015 16:32:06 +0530
|
|
|
cb8e9e |
Subject: [PATCH 139/190] cluster/ec: wind fops on good subvols for access/readdir[p]
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Backport of http://review.gluster.org/11246
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: I1e629a6adc803c4b7164a5a7a81ee5cb1d0e139c
|
|
|
cb8e9e |
BUG: 1228525
|
|
|
cb8e9e |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/51322
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
tests/basic/ec/ec-readdir.t | 30 +++++-
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-combine.c | 12 +--
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-common.c | 28 ++++-
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-common.h | 3 +-
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-data.c | 2 +
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-data.h | 1 +
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-dir-read.c | 207 +++++++++++++++++---------------
|
|
|
cb8e9e |
xlators/cluster/ec/src/ec-inode-read.c | 89 +++++++++++----
|
|
|
cb8e9e |
8 files changed, 232 insertions(+), 140 deletions(-)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/tests/basic/ec/ec-readdir.t b/tests/basic/ec/ec-readdir.t
|
|
|
cb8e9e |
index bbfa2dd..fad101b 100644
|
|
|
cb8e9e |
--- a/tests/basic/ec/ec-readdir.t
|
|
|
cb8e9e |
+++ b/tests/basic/ec/ec-readdir.t
|
|
|
cb8e9e |
@@ -9,12 +9,38 @@ cleanup
|
|
|
cb8e9e |
TEST glusterd
|
|
|
cb8e9e |
TEST pidof glusterd
|
|
|
cb8e9e |
TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
|
|
|
cb8e9e |
+TEST $CLI volume heal $V0 disable
|
|
|
cb8e9e |
TEST $CLI volume start $V0
|
|
|
cb8e9e |
|
|
|
cb8e9e |
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
|
|
|
cb8e9e |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
|
|
|
cb8e9e |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-TEST touch $M0/{1..100}
|
|
|
cb8e9e |
-EXPECT "100" echo $(ls $M0/* | wc -l)
|
|
|
cb8e9e |
+TEST mkdir $M0/d
|
|
|
cb8e9e |
+TEST touch $M0/d/{1..100}
|
|
|
cb8e9e |
+EXPECT "100" echo $(ls $M0/d/* | wc -l)
|
|
|
cb8e9e |
+TEST kill_brick $V0 $H0 $B0/${V0}0
|
|
|
cb8e9e |
+TEST kill_brick $V0 $H0 $B0/${V0}3
|
|
|
cb8e9e |
+TEST rm -rf $M0/d/{1..100}
|
|
|
cb8e9e |
+TEST $CLI volume start $V0 force
|
|
|
cb8e9e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
|
|
|
cb8e9e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
|
|
|
cb8e9e |
+#Do it 3 times so that even with all load balancing, readdir never falls
|
|
|
cb8e9e |
+#on stale bricks
|
|
|
cb8e9e |
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
|
|
|
cb8e9e |
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
|
|
|
cb8e9e |
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
|
|
|
cb8e9e |
+#Do the same test above for creation of entries
|
|
|
cb8e9e |
+TEST mkdir $M0/d1
|
|
|
cb8e9e |
+TEST kill_brick $V0 $H0 $B0/${V0}0
|
|
|
cb8e9e |
+TEST kill_brick $V0 $H0 $B0/${V0}3
|
|
|
cb8e9e |
+TEST touch $M0/d1/{1..100}
|
|
|
cb8e9e |
+TEST $CLI volume start $V0 force
|
|
|
cb8e9e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
|
|
|
cb8e9e |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
|
|
|
cb8e9e |
+#Do it 3 times so that even with all load balancing, readdir never falls
|
|
|
cb8e9e |
+#on stale bricks
|
|
|
cb8e9e |
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
|
|
|
cb8e9e |
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
|
|
|
cb8e9e |
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
|
|
|
cb8e9e |
cleanup
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
|
|
|
cb8e9e |
index a8cfabc..bf698d9 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-combine.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-combine.c
|
|
|
cb8e9e |
@@ -875,7 +875,7 @@ void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
|
|
|
cb8e9e |
ec_fop_data_t *fop = newcbk->fop;
|
|
|
cb8e9e |
ec_cbk_data_t *cbk = NULL, *tmp = NULL;
|
|
|
cb8e9e |
struct list_head *item = NULL;
|
|
|
cb8e9e |
- int32_t needed = 0, resume = 0;
|
|
|
cb8e9e |
+ int32_t needed = 0;
|
|
|
cb8e9e |
char str[32];
|
|
|
cb8e9e |
|
|
|
cb8e9e |
LOCK(&fop->lock);
|
|
|
cb8e9e |
@@ -912,12 +912,6 @@ void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
|
|
|
cb8e9e |
ec_trace("ANSWER", fop, "combine=%s[%d]",
|
|
|
cb8e9e |
ec_bin(str, sizeof(str), newcbk->mask, 0), newcbk->count);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if ((newcbk->count == fop->expected) && (fop->answer == NULL)) {
|
|
|
cb8e9e |
- fop->answer = newcbk;
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- resume = 1;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
|
|
|
cb8e9e |
if ((fop->mask ^ fop->remaining) == fop->received) {
|
|
|
cb8e9e |
needed = fop->minimum - cbk->count;
|
|
|
cb8e9e |
@@ -927,9 +921,5 @@ void ec_combine (ec_cbk_data_t *newcbk, ec_combine_f combine)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (needed > 0) {
|
|
|
cb8e9e |
ec_dispatch_next(fop, newcbk->idx);
|
|
|
cb8e9e |
- } else if (resume) {
|
|
|
cb8e9e |
- ec_update_bad(fop, newcbk->mask);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- ec_resume(fop, 0);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
|
cb8e9e |
index f69234e..4dbbe28 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-common.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
|
cb8e9e |
@@ -186,6 +186,10 @@ void ec_update_bad(ec_fop_data_t * fop, uintptr_t good)
|
|
|
cb8e9e |
ec_t *ec = fop->xl->private;
|
|
|
cb8e9e |
uintptr_t bad;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ /*Don't let fops that do dispatch_one() to update bad*/
|
|
|
cb8e9e |
+ if (fop->expected == 1)
|
|
|
cb8e9e |
+ return;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
bad = ec->xl_up & ~(fop->remaining | good);
|
|
|
cb8e9e |
fop->bad |= bad;
|
|
|
cb8e9e |
fop->good |= good;
|
|
|
cb8e9e |
@@ -314,6 +318,20 @@ void ec_resume_parent(ec_fop_data_t * fop, int32_t error)
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+gf_boolean_t
|
|
|
cb8e9e |
+ec_is_recoverable_error (int32_t op_errno)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ switch (op_errno) {
|
|
|
cb8e9e |
+ case ENOTCONN:
|
|
|
cb8e9e |
+ case ESTALE:
|
|
|
cb8e9e |
+ case ENOENT:
|
|
|
cb8e9e |
+ case EBADFD:/*Opened fd but brick is disconnected*/
|
|
|
cb8e9e |
+ case EIO:/*Backend-fs crash like XFS/ext4 etc*/
|
|
|
cb8e9e |
+ return _gf_true;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ return _gf_false;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
void ec_complete(ec_fop_data_t * fop)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
ec_cbk_data_t * cbk = NULL;
|
|
|
cb8e9e |
@@ -329,14 +347,12 @@ void ec_complete(ec_fop_data_t * fop)
|
|
|
cb8e9e |
if (!list_empty(&fop->cbk_list)) {
|
|
|
cb8e9e |
cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
|
|
|
cb8e9e |
healing_count = ec_bits_count (cbk->mask & fop->healing);
|
|
|
cb8e9e |
+ /* fop shouldn't be treated as success if it is not
|
|
|
cb8e9e |
+ * successful on at least fop->minimum good copies*/
|
|
|
cb8e9e |
if ((cbk->count - healing_count) >= fop->minimum) {
|
|
|
cb8e9e |
- /* fop shouldn't be treated as success if it is not
|
|
|
cb8e9e |
- * successful on at least fop->minimum good copies*/
|
|
|
cb8e9e |
- if ((cbk->op_ret >= 0) || (cbk->op_errno != ENOTCONN)) {
|
|
|
cb8e9e |
- fop->answer = cbk;
|
|
|
cb8e9e |
+ fop->answer = cbk;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- update = 1;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ update = 1;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
|
|
|
cb8e9e |
index e3f01ca..3334a7b 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-common.h
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-common.h
|
|
|
cb8e9e |
@@ -75,7 +75,7 @@ typedef enum {
|
|
|
cb8e9e |
#define EC_STATE_HEAL_POST_INODELK_UNLOCK 217
|
|
|
cb8e9e |
#define EC_STATE_HEAL_DISPATCH 218
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-int32_t ec_dispatch_one_retry(ec_fop_data_t *fop, int32_t idx, int32_t op_ret);
|
|
|
cb8e9e |
+int32_t ec_dispatch_one_retry (ec_fop_data_t *fop, int32_t idx, int32_t op_ret);
|
|
|
cb8e9e |
int32_t ec_dispatch_next(ec_fop_data_t * fop, int32_t idx);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
void ec_complete(ec_fop_data_t * fop);
|
|
|
cb8e9e |
@@ -111,5 +111,6 @@ void ec_resume(ec_fop_data_t * fop, int32_t error);
|
|
|
cb8e9e |
void ec_resume_parent(ec_fop_data_t * fop, int32_t error);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
void ec_manager(ec_fop_data_t * fop, int32_t error);
|
|
|
cb8e9e |
+gf_boolean_t ec_is_recoverable_error (int32_t op_errno);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
#endif /* __EC_COMMON_H__ */
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
|
|
|
cb8e9e |
index 047ccd5..72f3b0b 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-data.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-data.c
|
|
|
cb8e9e |
@@ -59,6 +59,7 @@ ec_cbk_data_t * ec_cbk_data_allocate(call_frame_t * frame, xlator_t * this,
|
|
|
cb8e9e |
cbk->count = 1;
|
|
|
cb8e9e |
cbk->op_ret = op_ret;
|
|
|
cb8e9e |
cbk->op_errno = op_errno;
|
|
|
cb8e9e |
+ INIT_LIST_HEAD (&cbk->entries.list);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
LOCK(&fop->lock);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -92,6 +93,7 @@ void ec_cbk_data_destroy(ec_cbk_data_t * cbk)
|
|
|
cb8e9e |
iobref_unref(cbk->buffers);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
GF_FREE(cbk->vector);
|
|
|
cb8e9e |
+ gf_dirent_free (&cbk->entries);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
mem_put(cbk);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
|
|
|
cb8e9e |
index 8f6d5de..670b3b8 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-data.h
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-data.h
|
|
|
cb8e9e |
@@ -266,6 +266,7 @@ struct _ec_cbk_data
|
|
|
cb8e9e |
struct iovec * vector;
|
|
|
cb8e9e |
struct iobref * buffers;
|
|
|
cb8e9e |
uint64_t dirty[2];
|
|
|
cb8e9e |
+ gf_dirent_t entries;
|
|
|
cb8e9e |
};
|
|
|
cb8e9e |
|
|
|
cb8e9e |
struct _ec_heal
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
|
|
|
cb8e9e |
index 7821878..69df4d2 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-dir-read.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-dir-read.c
|
|
|
cb8e9e |
@@ -316,34 +316,36 @@ out:
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/* FOP: readdir */
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries)
|
|
|
cb8e9e |
+void ec_adjust_readdirp (ec_t *ec, int32_t idx, gf_dirent_t *entries)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
gf_dirent_t * entry;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
list_for_each_entry(entry, &entries->list, list)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
+ if (!entry->inode)
|
|
|
cb8e9e |
+ continue;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
if (entry->d_stat.ia_type == IA_IFREG)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
if ((entry->dict == NULL) ||
|
|
|
cb8e9e |
(ec_dict_del_number(entry->dict, EC_XATTR_SIZE,
|
|
|
cb8e9e |
- &entry->d_stat.ia_size) != 0))
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- gf_log(ec->xl->name, GF_LOG_WARNING, "Unable to get exact "
|
|
|
cb8e9e |
- "file size.");
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- entry->d_stat.ia_size *= ec->fragments;
|
|
|
cb8e9e |
+ &entry->d_stat.ia_size) != 0)) {
|
|
|
cb8e9e |
+ inode_unref (entry->inode);
|
|
|
cb8e9e |
+ entry->inode = NULL;
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ ec_iatt_rebuild(ec, &entry->d_stat, 1, 1);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- ec_iatt_rebuild(ec, &entry->d_stat, 1, 1);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-int32_t ec_readdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
|
|
|
cb8e9e |
- int32_t op_ret, int32_t op_errno, gf_dirent_t * entries,
|
|
|
cb8e9e |
- dict_t * xdata)
|
|
|
cb8e9e |
+int32_t
|
|
|
cb8e9e |
+ec_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
+ int32_t op_ret, int32_t op_errno,
|
|
|
cb8e9e |
+ gf_dirent_t *entries, dict_t *xdata)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- ec_fop_data_t * fop = NULL;
|
|
|
cb8e9e |
+ ec_fop_data_t *fop = NULL;
|
|
|
cb8e9e |
+ ec_cbk_data_t *cbk = NULL;
|
|
|
cb8e9e |
int32_t idx = (int32_t)(uintptr_t)cookie;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
VALIDATE_OR_GOTO(this, out);
|
|
|
cb8e9e |
@@ -356,18 +358,15 @@ int32_t ec_readdir_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
|
|
|
cb8e9e |
ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
|
|
|
cb8e9e |
frame, op_ret, op_errno);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (op_ret > 0)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- ec_adjust_readdir(fop->xl->private, idx, entries);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (!ec_dispatch_one_retry(fop, idx, op_ret))
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- if (fop->cbks.readdir != NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- fop->cbks.readdir(fop->req_frame, fop, this, op_ret, op_errno,
|
|
|
cb8e9e |
- entries, xdata);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ cbk = ec_cbk_data_allocate (frame, this, fop, fop->id,
|
|
|
cb8e9e |
+ idx, op_ret, op_errno);
|
|
|
cb8e9e |
+ if (cbk) {
|
|
|
cb8e9e |
+ if (xdata)
|
|
|
cb8e9e |
+ cbk->xdata = dict_ref (xdata);
|
|
|
cb8e9e |
+ if (cbk->op_ret >= 0)
|
|
|
cb8e9e |
+ list_splice_init (&entries->list,
|
|
|
cb8e9e |
+ &cbk->entries.list);
|
|
|
cb8e9e |
+ ec_combine (cbk, NULL);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
out:
|
|
|
cb8e9e |
@@ -383,14 +382,15 @@ void ec_wind_readdir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
ec_trace("WIND", fop, "idx=%d", idx);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- STACK_WIND_COOKIE(fop->frame, ec_readdir_cbk, (void *)(uintptr_t)idx,
|
|
|
cb8e9e |
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
|
|
|
cb8e9e |
ec->xl_list[idx], ec->xl_list[idx]->fops->readdir,
|
|
|
cb8e9e |
fop->fd, fop->size, fop->offset, fop->xdata);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- ec_fd_t *ctx;
|
|
|
cb8e9e |
+ ec_fd_t *ctx = NULL;
|
|
|
cb8e9e |
+ ec_cbk_data_t *cbk = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
switch (state)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
@@ -404,27 +404,21 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
|
|
|
cb8e9e |
return EC_STATE_REPORT;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (fop->xdata == NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- fop->xdata = dict_new();
|
|
|
cb8e9e |
- if (fop->xdata == NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare "
|
|
|
cb8e9e |
- "readdirp request");
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- fop->error = EIO;
|
|
|
cb8e9e |
+ if (fop->id == GF_FOP_READDIRP) {
|
|
|
cb8e9e |
+ if (fop->xdata == NULL) {
|
|
|
cb8e9e |
+ fop->xdata = dict_new();
|
|
|
cb8e9e |
+ if (fop->xdata == NULL) {
|
|
|
cb8e9e |
+ fop->error = EIO;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- return EC_STATE_REPORT;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
- if (dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0) != 0)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to prepare "
|
|
|
cb8e9e |
- "readdirp request");
|
|
|
cb8e9e |
+ return EC_STATE_REPORT;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- fop->error = EIO;
|
|
|
cb8e9e |
+ if (dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0)) {
|
|
|
cb8e9e |
+ fop->error = EIO;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- return EC_STATE_REPORT;
|
|
|
cb8e9e |
+ return EC_STATE_REPORT;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (fop->offset != 0)
|
|
|
cb8e9e |
@@ -440,37 +434,92 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
|
|
|
cb8e9e |
return EC_STATE_REPORT;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
fop->mask &= 1ULL << idx;
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO);
|
|
|
cb8e9e |
+ ec_lock(fop);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- /* Fall through */
|
|
|
cb8e9e |
+ return EC_STATE_DISPATCH;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
case EC_STATE_DISPATCH:
|
|
|
cb8e9e |
ec_dispatch_one(fop);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ return EC_STATE_PREPARE_ANSWER;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ case EC_STATE_PREPARE_ANSWER:
|
|
|
cb8e9e |
+ cbk = fop->answer;
|
|
|
cb8e9e |
+ if (cbk) {
|
|
|
cb8e9e |
+ if ((cbk->op_ret < 0) &&
|
|
|
cb8e9e |
+ ec_is_recoverable_error (cbk->op_errno)) {
|
|
|
cb8e9e |
+ GF_ASSERT (fop->mask & (1ULL<<cbk->idx));
|
|
|
cb8e9e |
+ fop->mask ^= (1ULL << cbk->idx);
|
|
|
cb8e9e |
+ if (fop->mask == 0)
|
|
|
cb8e9e |
+ return EC_STATE_REPORT;
|
|
|
cb8e9e |
+ return EC_STATE_DISPATCH;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ if ((cbk->op_ret > 0) && (fop->id == GF_FOP_READDIRP)) {
|
|
|
cb8e9e |
+ ec_adjust_readdirp (fop->xl->private, cbk->idx,
|
|
|
cb8e9e |
+ &cbk->entries);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ ec_fop_set_error(fop, EIO);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
return EC_STATE_REPORT;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ case EC_STATE_REPORT:
|
|
|
cb8e9e |
+ cbk = fop->answer;
|
|
|
cb8e9e |
+ GF_ASSERT (cbk);
|
|
|
cb8e9e |
+ if (fop->id == GF_FOP_READDIR) {
|
|
|
cb8e9e |
+ if (fop->cbks.readdir != NULL) {
|
|
|
cb8e9e |
+ fop->cbks.readdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
|
|
|
cb8e9e |
+ cbk->op_errno, &cbk->entries, cbk->xdata);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ if (fop->cbks.readdirp != NULL) {
|
|
|
cb8e9e |
+ fop->cbks.readdirp(fop->req_frame, fop, fop->xl,
|
|
|
cb8e9e |
+ cbk->op_ret, cbk->op_errno,
|
|
|
cb8e9e |
+ &cbk->entries, cbk->xdata);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ if (fop->offset == 0)
|
|
|
cb8e9e |
+ return EC_STATE_LOCK_REUSE;
|
|
|
cb8e9e |
+ else
|
|
|
cb8e9e |
+ return EC_STATE_END;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
case -EC_STATE_INIT:
|
|
|
cb8e9e |
- if (fop->id == GF_FOP_READDIR)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- if (fop->cbks.readdir != NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
+ case -EC_STATE_LOCK:
|
|
|
cb8e9e |
+ case -EC_STATE_DISPATCH:
|
|
|
cb8e9e |
+ case -EC_STATE_PREPARE_ANSWER:
|
|
|
cb8e9e |
+ case -EC_STATE_REPORT:
|
|
|
cb8e9e |
+ if (fop->id == GF_FOP_READDIR) {
|
|
|
cb8e9e |
+ if (fop->cbks.readdir != NULL) {
|
|
|
cb8e9e |
fop->cbks.readdir(fop->req_frame, fop, fop->xl, -1,
|
|
|
cb8e9e |
fop->error, NULL, NULL);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
- else
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- if (fop->cbks.readdirp != NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ if (fop->cbks.readdirp != NULL) {
|
|
|
cb8e9e |
fop->cbks.readdirp(fop->req_frame, fop, fop->xl, -1,
|
|
|
cb8e9e |
fop->error, NULL, NULL);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
+ if (fop->offset == 0)
|
|
|
cb8e9e |
+ return EC_STATE_LOCK_REUSE;
|
|
|
cb8e9e |
+ else
|
|
|
cb8e9e |
+ return EC_STATE_END;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- case EC_STATE_REPORT:
|
|
|
cb8e9e |
- case -EC_STATE_REPORT:
|
|
|
cb8e9e |
- return EC_STATE_END;
|
|
|
cb8e9e |
+ case -EC_STATE_LOCK_REUSE:
|
|
|
cb8e9e |
+ case EC_STATE_LOCK_REUSE:
|
|
|
cb8e9e |
+ GF_ASSERT (fop->offset == 0);
|
|
|
cb8e9e |
+ ec_lock_reuse(fop);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ return EC_STATE_UNLOCK;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ case -EC_STATE_UNLOCK:
|
|
|
cb8e9e |
+ case EC_STATE_UNLOCK:
|
|
|
cb8e9e |
+ GF_ASSERT (fop->offset == 0);
|
|
|
cb8e9e |
+ ec_unlock(fop);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ return EC_STATE_END;
|
|
|
cb8e9e |
default:
|
|
|
cb8e9e |
gf_log(fop->xl->name, GF_LOG_ERROR, "Unhandled state %d for %s",
|
|
|
cb8e9e |
state, ec_fop_name(fop->id));
|
|
|
cb8e9e |
@@ -544,51 +593,11 @@ out:
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/* FOP: readdirp */
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-int32_t ec_readdirp_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
|
|
|
cb8e9e |
- int32_t op_ret, int32_t op_errno,
|
|
|
cb8e9e |
- gf_dirent_t * entries, dict_t * xdata)
|
|
|
cb8e9e |
-{
|
|
|
cb8e9e |
- ec_fop_data_t * fop = NULL;
|
|
|
cb8e9e |
- int32_t idx = (int32_t)(uintptr_t)cookie;
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- VALIDATE_OR_GOTO(this, out);
|
|
|
cb8e9e |
- GF_VALIDATE_OR_GOTO(this->name, frame, out);
|
|
|
cb8e9e |
- GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
|
|
|
cb8e9e |
- GF_VALIDATE_OR_GOTO(this->name, this->private, out);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- fop = frame->local;
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
|
|
|
cb8e9e |
- frame, op_ret, op_errno);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (op_ret > 0)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- ec_adjust_readdir(fop->xl->private, idx, entries);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- if (!ec_dispatch_one_retry(fop, idx, op_ret))
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- if (fop->cbks.readdirp != NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- fop->cbks.readdirp(fop->req_frame, fop, this, op_ret, op_errno,
|
|
|
cb8e9e |
- entries, xdata);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
-out:
|
|
|
cb8e9e |
- if (fop != NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- ec_complete(fop);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
- return 0;
|
|
|
cb8e9e |
-}
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
void ec_wind_readdirp(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
ec_trace("WIND", fop, "idx=%d", idx);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- STACK_WIND_COOKIE(fop->frame, ec_readdirp_cbk, (void *)(uintptr_t)idx,
|
|
|
cb8e9e |
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
|
|
|
cb8e9e |
ec->xl_list[idx], ec->xl_list[idx]->fops->readdirp,
|
|
|
cb8e9e |
fop->fd, fop->size, fop->offset, fop->xdata);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
|
|
|
cb8e9e |
index ef2170f..1f91391 100644
|
|
|
cb8e9e |
--- a/xlators/cluster/ec/src/ec-inode-read.c
|
|
|
cb8e9e |
+++ b/xlators/cluster/ec/src/ec-inode-read.c
|
|
|
cb8e9e |
@@ -22,7 +22,8 @@
|
|
|
cb8e9e |
int32_t ec_access_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
|
|
|
cb8e9e |
int32_t op_ret, int32_t op_errno, dict_t * xdata)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- ec_fop_data_t * fop = NULL;
|
|
|
cb8e9e |
+ ec_fop_data_t *fop = NULL;
|
|
|
cb8e9e |
+ ec_cbk_data_t *cbk = NULL;
|
|
|
cb8e9e |
int32_t idx = (int32_t)(uintptr_t)cookie;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
VALIDATE_OR_GOTO(this, out);
|
|
|
cb8e9e |
@@ -35,19 +36,18 @@ int32_t ec_access_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
|
|
|
cb8e9e |
ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx,
|
|
|
cb8e9e |
frame, op_ret, op_errno);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (!ec_dispatch_one_retry(fop, idx, op_ret))
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- if (fop->cbks.access != NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- fop->cbks.access(fop->req_frame, fop, this, op_ret, op_errno,
|
|
|
cb8e9e |
- xdata);
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ cbk = ec_cbk_data_allocate (frame, this, fop, GF_FOP_ACCESS,
|
|
|
cb8e9e |
+ idx, op_ret, op_errno);
|
|
|
cb8e9e |
+ if (cbk) {
|
|
|
cb8e9e |
+ if (xdata)
|
|
|
cb8e9e |
+ cbk->xdata = dict_ref (xdata);
|
|
|
cb8e9e |
+ ec_combine (cbk, NULL);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
out:
|
|
|
cb8e9e |
if (fop != NULL)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- ec_complete(fop);
|
|
|
cb8e9e |
+ ec_complete (fop);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
@@ -62,25 +62,72 @@ void ec_wind_access(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
|
|
|
cb8e9e |
&fop->loc[0], fop->int32, fop->xdata);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-int32_t ec_manager_access(ec_fop_data_t * fop, int32_t state)
|
|
|
cb8e9e |
+int32_t
|
|
|
cb8e9e |
+ec_manager_access(ec_fop_data_t *fop, int32_t state)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- switch (state)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
+ ec_cbk_data_t *cbk = NULL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ switch (state) {
|
|
|
cb8e9e |
case EC_STATE_INIT:
|
|
|
cb8e9e |
+ case EC_STATE_LOCK:
|
|
|
cb8e9e |
+ ec_lock_prepare_inode (fop, &fop->loc[0], EC_QUERY_INFO);
|
|
|
cb8e9e |
+ ec_lock (fop);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ return EC_STATE_DISPATCH;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
case EC_STATE_DISPATCH:
|
|
|
cb8e9e |
- ec_dispatch_one(fop);
|
|
|
cb8e9e |
+ ec_dispatch_one (fop);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ return EC_STATE_PREPARE_ANSWER;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ case EC_STATE_PREPARE_ANSWER:
|
|
|
cb8e9e |
+ cbk = fop->answer;
|
|
|
cb8e9e |
+ if (cbk) {
|
|
|
cb8e9e |
+ if ((cbk->op_ret < 0) && ec_is_recoverable_error (cbk->op_errno)) {
|
|
|
cb8e9e |
+ GF_ASSERT (fop->mask & (1ULL<<cbk->idx));
|
|
|
cb8e9e |
+ fop->mask ^= (1ULL << cbk->idx);
|
|
|
cb8e9e |
+ if (fop->mask == 0)
|
|
|
cb8e9e |
+ return EC_STATE_REPORT;
|
|
|
cb8e9e |
+ return EC_STATE_DISPATCH;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ } else {
|
|
|
cb8e9e |
+ ec_fop_set_error(fop, EIO);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
return EC_STATE_REPORT;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- case -EC_STATE_INIT:
|
|
|
cb8e9e |
- if (fop->cbks.access != NULL)
|
|
|
cb8e9e |
- {
|
|
|
cb8e9e |
- fop->cbks.access(fop->req_frame, fop, fop->xl, -1, fop->error,
|
|
|
cb8e9e |
- NULL);
|
|
|
cb8e9e |
+ case EC_STATE_REPORT:
|
|
|
cb8e9e |
+ cbk = fop->answer;
|
|
|
cb8e9e |
+ GF_ASSERT (cbk);
|
|
|
cb8e9e |
+ if (fop->cbks.access != NULL) {
|
|
|
cb8e9e |
+ if (cbk) {
|
|
|
cb8e9e |
+ fop->cbks.access(fop->req_frame, fop, fop->xl,
|
|
|
cb8e9e |
+ cbk->op_ret, cbk->op_errno,
|
|
|
cb8e9e |
+ cbk->xdata);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
+ return EC_STATE_LOCK_REUSE;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ case -EC_STATE_INIT:
|
|
|
cb8e9e |
+ case -EC_STATE_LOCK:
|
|
|
cb8e9e |
+ case -EC_STATE_DISPATCH:
|
|
|
cb8e9e |
+ case -EC_STATE_PREPARE_ANSWER:
|
|
|
cb8e9e |
case -EC_STATE_REPORT:
|
|
|
cb8e9e |
- case EC_STATE_REPORT:
|
|
|
cb8e9e |
+ if (fop->cbks.access != NULL) {
|
|
|
cb8e9e |
+ fop->cbks.access(fop->req_frame, fop, fop->xl, -1,
|
|
|
cb8e9e |
+ fop->error, NULL);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ return -EC_STATE_LOCK_REUSE;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ case -EC_STATE_LOCK_REUSE:
|
|
|
cb8e9e |
+ case EC_STATE_LOCK_REUSE:
|
|
|
cb8e9e |
+ ec_lock_reuse(fop);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ return EC_STATE_UNLOCK;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ case -EC_STATE_UNLOCK:
|
|
|
cb8e9e |
+ case EC_STATE_UNLOCK:
|
|
|
cb8e9e |
+ ec_unlock(fop);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
return EC_STATE_END;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
default:
|
|
|
cb8e9e |
@@ -88,7 +135,7 @@ int32_t ec_manager_access(ec_fop_data_t * fop, int32_t state)
|
|
|
cb8e9e |
state, ec_fop_name(fop->id));
|
|
|
cb8e9e |
|
|
|
cb8e9e |
return EC_STATE_END;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
void ec_access(call_frame_t * frame, xlator_t * this, uintptr_t target,
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|