|
|
3604df |
From 0d9aeed1092e7ce206a43e9fa29134de73731306 Mon Sep 17 00:00:00 2001
|
|
|
3604df |
From: Ashish Pandey <aspandey@redhat.com>
|
|
|
3604df |
Date: Thu, 13 Oct 2016 14:13:51 +0530
|
|
|
3604df |
Subject: [PATCH 098/141] cluster/ec: set/unset dirty flag for data/metadata update
|
|
|
3604df |
|
|
|
3604df |
Currently, for all the update operations, metadata or data,
|
|
|
3604df |
we set the dirty flag at the end of the operation only if
|
|
|
3604df |
a brick is down. This leads to delay in healing and in some
|
|
|
3604df |
cases not at all.
|
|
|
3604df |
In this patch we set (+1) the dirty flag
|
|
|
3604df |
at the start of the metadata or data update operations and
|
|
|
3604df |
after successfull completion of the fop, we unset (-1) it again.
|
|
|
3604df |
|
|
|
3604df |
>Reviewed-on: http://review.gluster.org/13733
|
|
|
3604df |
>Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
3604df |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
|
|
|
3604df |
>Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
|
|
3604df |
|
|
|
3604df |
Change-Id: Ide5668bdec7b937a61c5c840cdc79a967598e1e9
|
|
|
3604df |
BUG: 1361513
|
|
|
3604df |
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
|
|
3604df |
Reviewed-on: https://code.engineering.redhat.com/gerrit/87008
|
|
|
3604df |
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
3604df |
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
3604df |
---
|
|
|
3604df |
tests/basic/ec/ec-background-heals.t | 1 +
|
|
|
3604df |
tests/basic/ec/ec-new-entry.t | 14 ++
|
|
|
3604df |
xlators/cluster/ec/src/ec-common.c | 284 +++++++++++++++++++---------------
|
|
|
3604df |
xlators/cluster/ec/src/ec-common.h | 3 +-
|
|
|
3604df |
xlators/cluster/ec/src/ec-data.h | 3 +-
|
|
|
3604df |
5 files changed, 181 insertions(+), 124 deletions(-)
|
|
|
3604df |
|
|
|
3604df |
diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t
|
|
|
3604df |
index 726e60d..7ac6c0e 100644
|
|
|
3604df |
--- a/tests/basic/ec/ec-background-heals.t
|
|
|
3604df |
+++ b/tests/basic/ec/ec-background-heals.t
|
|
|
3604df |
@@ -23,6 +23,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
|
|
|
3604df |
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
|
|
|
3604df |
EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
|
|
|
3604df |
TEST touch $M0/a
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" get_pending_heal_count $V0 #One for each active brick
|
|
|
3604df |
TEST kill_brick $V0 $H0 $B0/${V0}2
|
|
|
3604df |
echo abc > $M0/a
|
|
|
3604df |
EXPECT 2 get_pending_heal_count $V0 #One for each active brick
|
|
|
3604df |
diff --git a/tests/basic/ec/ec-new-entry.t b/tests/basic/ec/ec-new-entry.t
|
|
|
3604df |
index 3a5c2ee..2ba2bf5 100644
|
|
|
3604df |
--- a/tests/basic/ec/ec-new-entry.t
|
|
|
3604df |
+++ b/tests/basic/ec/ec-new-entry.t
|
|
|
3604df |
@@ -12,6 +12,17 @@ function get_md5sum {
|
|
|
3604df |
md5sum $1 | awk '{print $1}'
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+#after replace-brick immediately trusted.ec.version will be absent, so if it
|
|
|
3604df |
+#is present we can assume that heal attempted on root
|
|
|
3604df |
+function root_heal_attempted {
|
|
|
3604df |
+ if [ -z $(get_hex_xattr trusted.ec.version $1) ];
|
|
|
3604df |
+ then
|
|
|
3604df |
+ echo "N";
|
|
|
3604df |
+ else
|
|
|
3604df |
+ echo "Y";
|
|
|
3604df |
+ fi
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
TEST glusterd
|
|
|
3604df |
TEST pidof glusterd
|
|
|
3604df |
TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
|
|
|
3604df |
@@ -23,6 +34,9 @@ touch $M0/11
|
|
|
3604df |
for i in {1..10}; do dd if=/dev/zero of=$M0/$i bs=1M count=1; done
|
|
|
3604df |
TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force
|
|
|
3604df |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
|
|
|
3604df |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
|
|
|
3604df |
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" root_heal_attempted $B0/${V0}6
|
|
|
3604df |
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
3604df |
#ls -l gives "Total" line so number of lines will be 1 more
|
|
|
3604df |
EXPECT "^12$" num_entries $B0/${V0}6
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
|
|
|
3604df |
index 2e6759a..6ff87ad 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-common.c
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-common.c
|
|
|
3604df |
@@ -886,6 +886,27 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
|
|
|
3604df |
return _gf_true;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+gf_boolean_t
|
|
|
3604df |
+ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty)
|
|
|
3604df |
+{
|
|
|
3604df |
+
|
|
|
3604df |
+ gf_boolean_t set_dirty = _gf_false;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) {
|
|
|
3604df |
+ dirty[EC_DATA_TXN] = 1;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) {
|
|
|
3604df |
+ dirty[EC_METADATA_TXN] = 1;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ if (dirty[EC_METADATA_TXN] || dirty[EC_DATA_TXN]) {
|
|
|
3604df |
+ set_dirty = _gf_true;
|
|
|
3604df |
+ }
|
|
|
3604df |
+
|
|
|
3604df |
+ return set_dirty;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
int32_t
|
|
|
3604df |
ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
|
|
|
3604df |
xlator_t *this, int32_t op_ret, int32_t op_errno,
|
|
|
3604df |
@@ -906,8 +927,8 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
|
|
|
3604df |
LOCK(&lock->loc.inode->lock);
|
|
|
3604df |
|
|
|
3604df |
list_for_each_entry(link, &lock->owners, owner_list) {
|
|
|
3604df |
- if ((link->fop->flags & EC_FLAG_WAITING_SIZE) != 0) {
|
|
|
3604df |
- link->fop->flags ^= EC_FLAG_WAITING_SIZE;
|
|
|
3604df |
+ if ((link->fop->flags & EC_FLAG_WAITING_XATTROP) != 0) {
|
|
|
3604df |
+ link->fop->flags ^= EC_FLAG_WAITING_XATTROP;
|
|
|
3604df |
|
|
|
3604df |
list_add_tail(&link->fop->cbk_list, &list);
|
|
|
3604df |
}
|
|
|
3604df |
@@ -921,68 +942,70 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
|
|
|
3604df |
goto unlock;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, ctx->pre_version,
|
|
|
3604df |
- EC_VERSION_SIZE);
|
|
|
3604df |
- if (op_errno != 0) {
|
|
|
3604df |
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
|
|
|
3604df |
- EC_MSG_VER_XATTR_GET_FAIL,
|
|
|
3604df |
- "Unable to get version xattr");
|
|
|
3604df |
-
|
|
|
3604df |
- goto unlock;
|
|
|
3604df |
- }
|
|
|
3604df |
- ctx->post_version[0] += ctx->pre_version[0];
|
|
|
3604df |
- ctx->post_version[1] += ctx->pre_version[1];
|
|
|
3604df |
-
|
|
|
3604df |
- ctx->have_version = _gf_true;
|
|
|
3604df |
-
|
|
|
3604df |
- if (lock->loc.inode->ia_type == IA_IFREG ||
|
|
|
3604df |
- lock->loc.inode->ia_type == IA_INVAL) {
|
|
|
3604df |
- op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, &ctx->pre_size);
|
|
|
3604df |
- if (op_errno != 0) {
|
|
|
3604df |
- if (lock->loc.inode->ia_type == IA_IFREG) {
|
|
|
3604df |
+ if (parent->flags & EC_FLAG_QUERY_METADATA) {
|
|
|
3604df |
+ parent->flags ^= EC_FLAG_QUERY_METADATA;
|
|
|
3604df |
+ op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION,
|
|
|
3604df |
+ ctx->pre_version,
|
|
|
3604df |
+ EC_VERSION_SIZE);
|
|
|
3604df |
+ if (op_errno != 0) {
|
|
|
3604df |
gf_msg (this->name, GF_LOG_ERROR, op_errno,
|
|
|
3604df |
- EC_MSG_SIZE_XATTR_GET_FAIL,
|
|
|
3604df |
- "Unable to get size xattr");
|
|
|
3604df |
-
|
|
|
3604df |
+ EC_MSG_VER_XATTR_GET_FAIL,
|
|
|
3604df |
+ "Unable to get version xattr");
|
|
|
3604df |
goto unlock;
|
|
|
3604df |
}
|
|
|
3604df |
- } else {
|
|
|
3604df |
- ctx->post_size = ctx->pre_size;
|
|
|
3604df |
+ ctx->post_version[0] += ctx->pre_version[0];
|
|
|
3604df |
+ ctx->post_version[1] += ctx->pre_version[1];
|
|
|
3604df |
|
|
|
3604df |
- ctx->have_size = _gf_true;
|
|
|
3604df |
- }
|
|
|
3604df |
+ ctx->have_version = _gf_true;
|
|
|
3604df |
|
|
|
3604df |
- op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, &ctx->config);
|
|
|
3604df |
- if (op_errno != 0) {
|
|
|
3604df |
- if ((lock->loc.inode->ia_type == IA_IFREG) ||
|
|
|
3604df |
- (op_errno != ENODATA)) {
|
|
|
3604df |
- gf_msg (this->name, GF_LOG_ERROR, op_errno,
|
|
|
3604df |
- EC_MSG_CONFIG_XATTR_GET_FAIL,
|
|
|
3604df |
- "Unable to get config xattr");
|
|
|
3604df |
+ if (lock->loc.inode->ia_type == IA_IFREG ||
|
|
|
3604df |
+ lock->loc.inode->ia_type == IA_INVAL) {
|
|
|
3604df |
+ op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE,
|
|
|
3604df |
+ &ctx->pre_size);
|
|
|
3604df |
+ if (op_errno != 0) {
|
|
|
3604df |
+ if (lock->loc.inode->ia_type == IA_IFREG) {
|
|
|
3604df |
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
|
|
|
3604df |
+ EC_MSG_SIZE_XATTR_GET_FAIL,
|
|
|
3604df |
+ "Unable to get size xattr");
|
|
|
3604df |
+ goto unlock;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ ctx->post_size = ctx->pre_size;
|
|
|
3604df |
|
|
|
3604df |
- goto unlock;
|
|
|
3604df |
- }
|
|
|
3604df |
- } else {
|
|
|
3604df |
- if (!ec_config_check(parent, &ctx->config)) {
|
|
|
3604df |
- gf_msg (this->name, GF_LOG_ERROR, EINVAL,
|
|
|
3604df |
- EC_MSG_CONFIG_XATTR_INVALID,
|
|
|
3604df |
- "Invalid config xattr");
|
|
|
3604df |
+ ctx->have_size = _gf_true;
|
|
|
3604df |
+ }
|
|
|
3604df |
|
|
|
3604df |
- op_errno = EINVAL;
|
|
|
3604df |
+ op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG,
|
|
|
3604df |
+ &ctx->config);
|
|
|
3604df |
+ if (op_errno != 0) {
|
|
|
3604df |
+ if ((lock->loc.inode->ia_type == IA_IFREG) ||
|
|
|
3604df |
+ (op_errno != ENODATA)) {
|
|
|
3604df |
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
|
|
|
3604df |
+ EC_MSG_CONFIG_XATTR_GET_FAIL,
|
|
|
3604df |
+ "Unable to get config xattr");
|
|
|
3604df |
+
|
|
|
3604df |
+ goto unlock;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ if (!ec_config_check(parent, &ctx->config)) {
|
|
|
3604df |
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
|
|
|
3604df |
+ EC_MSG_CONFIG_XATTR_INVALID,
|
|
|
3604df |
+ "Invalid config xattr");
|
|
|
3604df |
|
|
|
3604df |
- goto unlock;
|
|
|
3604df |
- }
|
|
|
3604df |
+ op_errno = EINVAL;
|
|
|
3604df |
|
|
|
3604df |
- ctx->have_config = _gf_true;
|
|
|
3604df |
- }
|
|
|
3604df |
+ goto unlock;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ ctx->have_config = _gf_true;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ ctx->have_info = _gf_true;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- ctx->have_info = _gf_true;
|
|
|
3604df |
-
|
|
|
3604df |
+ ec_set_dirty_flag (fop->data, ctx, ctx->dirty);
|
|
|
3604df |
op_errno = 0;
|
|
|
3604df |
-
|
|
|
3604df |
unlock:
|
|
|
3604df |
- lock->getting_size = _gf_false;
|
|
|
3604df |
+ lock->getting_xattr = _gf_false;
|
|
|
3604df |
|
|
|
3604df |
UNLOCK(&lock->loc.inode->lock);
|
|
|
3604df |
|
|
|
3604df |
@@ -1029,16 +1052,19 @@ void ec_get_size_version(ec_lock_link_t *link)
|
|
|
3604df |
ec_inode_t *ctx;
|
|
|
3604df |
ec_fop_data_t *fop;
|
|
|
3604df |
dict_t *dict = NULL;
|
|
|
3604df |
- int32_t error = -ENOMEM;
|
|
|
3604df |
- gf_boolean_t getting_size;
|
|
|
3604df |
+ int32_t error = 0;
|
|
|
3604df |
+ gf_boolean_t getting_xattr;
|
|
|
3604df |
+ gf_boolean_t set_dirty = _gf_false;
|
|
|
3604df |
uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
|
|
|
3604df |
-
|
|
|
3604df |
+ uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
|
|
|
3604df |
lock = link->lock;
|
|
|
3604df |
ctx = lock->ctx;
|
|
|
3604df |
fop = link->fop;
|
|
|
3604df |
|
|
|
3604df |
+ set_dirty = ec_set_dirty_flag (link, ctx, dirty);
|
|
|
3604df |
+
|
|
|
3604df |
/* If ec metadata has already been retrieved, do not try again. */
|
|
|
3604df |
- if (ctx->have_info) {
|
|
|
3604df |
+ if (ctx->have_info && (!set_dirty)) {
|
|
|
3604df |
if (ec_is_data_fop (fop->id)) {
|
|
|
3604df |
fop->healing |= lock->healing;
|
|
|
3604df |
}
|
|
|
3604df |
@@ -1047,58 +1073,63 @@ void ec_get_size_version(ec_lock_link_t *link)
|
|
|
3604df |
|
|
|
3604df |
/* Determine if there's something we need to retrieve for the current
|
|
|
3604df |
* operation. */
|
|
|
3604df |
- if (!lock->query &&
|
|
|
3604df |
+ if (!set_dirty && !lock->query &&
|
|
|
3604df |
(lock->loc.inode->ia_type != IA_IFREG) &&
|
|
|
3604df |
(lock->loc.inode->ia_type != IA_INVAL)) {
|
|
|
3604df |
- return;
|
|
|
3604df |
+ return;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
memset(&loc, 0, sizeof(loc));
|
|
|
3604df |
|
|
|
3604df |
LOCK(&lock->loc.inode->lock);
|
|
|
3604df |
|
|
|
3604df |
- getting_size = lock->getting_size;
|
|
|
3604df |
- lock->getting_size = _gf_true;
|
|
|
3604df |
- if (getting_size) {
|
|
|
3604df |
- fop->flags |= EC_FLAG_WAITING_SIZE;
|
|
|
3604df |
+ getting_xattr = lock->getting_xattr;
|
|
|
3604df |
+ lock->getting_xattr = _gf_true;
|
|
|
3604df |
+ if (getting_xattr) {
|
|
|
3604df |
+ fop->flags |= EC_FLAG_WAITING_XATTROP;
|
|
|
3604df |
|
|
|
3604df |
ec_sleep(fop);
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
UNLOCK(&lock->loc.inode->lock);
|
|
|
3604df |
|
|
|
3604df |
- if (getting_size) {
|
|
|
3604df |
- error = 0;
|
|
|
3604df |
-
|
|
|
3604df |
+ if (getting_xattr) {
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
dict = dict_new();
|
|
|
3604df |
if (dict == NULL) {
|
|
|
3604df |
+ error = -ENOMEM;
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
+ if (lock->query && !ctx->have_info) {
|
|
|
3604df |
+ fop->flags |= EC_FLAG_QUERY_METADATA;
|
|
|
3604df |
+ /* Once we know that an xattrop will be needed,
|
|
|
3604df |
+ * we try to get all available information in a
|
|
|
3604df |
+ * single call. */
|
|
|
3604df |
+ error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
|
|
|
3604df |
+ EC_VERSION_SIZE);
|
|
|
3604df |
+ if (error != 0) {
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
|
|
|
3604df |
- /* Once we know that an xattrop will be needed, we try to get all available
|
|
|
3604df |
- * information in a single call. */
|
|
|
3604df |
- error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
|
|
|
3604df |
- EC_VERSION_SIZE);
|
|
|
3604df |
- if (error == 0) {
|
|
|
3604df |
- error = ec_dict_set_array(dict, EC_XATTR_DIRTY, allzero,
|
|
|
3604df |
- EC_VERSION_SIZE);
|
|
|
3604df |
- }
|
|
|
3604df |
- if (error != 0) {
|
|
|
3604df |
- goto out;
|
|
|
3604df |
+ if (lock->loc.inode->ia_type == IA_IFREG ||
|
|
|
3604df |
+ lock->loc.inode->ia_type == IA_INVAL) {
|
|
|
3604df |
+ error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
|
|
|
3604df |
+ if (error == 0) {
|
|
|
3604df |
+ error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
|
|
|
3604df |
+ }
|
|
|
3604df |
+ if (error != 0) {
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
}
|
|
|
3604df |
-
|
|
|
3604df |
- if (lock->loc.inode->ia_type == IA_IFREG ||
|
|
|
3604df |
- lock->loc.inode->ia_type == IA_INVAL) {
|
|
|
3604df |
- error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
|
|
|
3604df |
- if (error == 0) {
|
|
|
3604df |
- error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
|
|
|
3604df |
- }
|
|
|
3604df |
- if (error != 0) {
|
|
|
3604df |
- goto out;
|
|
|
3604df |
- }
|
|
|
3604df |
+ if (set_dirty) {
|
|
|
3604df |
+ error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty,
|
|
|
3604df |
+ EC_VERSION_SIZE);
|
|
|
3604df |
+ if (error != 0) {
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
fop->frame->root->uid = 0;
|
|
|
3604df |
@@ -1634,15 +1665,9 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
|
|
|
3604df |
if ((fop->error == 0) && (cbk != NULL) && (cbk->op_ret >= 0)) {
|
|
|
3604df |
if (link->update[0]) {
|
|
|
3604df |
ctx->post_version[0]++;
|
|
|
3604df |
- if (ec->node_mask & ~fop->good) {
|
|
|
3604df |
- ctx->dirty[0]++;
|
|
|
3604df |
- }
|
|
|
3604df |
}
|
|
|
3604df |
if (link->update[1]) {
|
|
|
3604df |
ctx->post_version[1]++;
|
|
|
3604df |
- if (ec->node_mask & ~fop->good) {
|
|
|
3604df |
- ctx->dirty[1]++;
|
|
|
3604df |
- }
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -1764,11 +1789,11 @@ void ec_unlock_lock(ec_lock_link_t *link)
|
|
|
3604df |
lock = link->lock;
|
|
|
3604df |
fop = link->fop;
|
|
|
3604df |
|
|
|
3604df |
+ lock->unlock_now = _gf_false;
|
|
|
3604df |
ec_clear_inode_info(fop, lock->loc.inode);
|
|
|
3604df |
|
|
|
3604df |
if ((lock->mask != 0) && lock->acquired) {
|
|
|
3604df |
ec_owner_set(fop->frame, lock);
|
|
|
3604df |
-
|
|
|
3604df |
lock->flock.l_type = F_UNLCK;
|
|
|
3604df |
ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,
|
|
|
3604df |
lock->loc.inode);
|
|
|
3604df |
@@ -1791,15 +1816,16 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
|
|
|
3604df |
ec_lock_t *lock;
|
|
|
3604df |
ec_inode_t *ctx;
|
|
|
3604df |
|
|
|
3604df |
+ link = fop->data;
|
|
|
3604df |
+ lock = link->lock;
|
|
|
3604df |
+ ctx = lock->ctx;
|
|
|
3604df |
+
|
|
|
3604df |
if (op_ret < 0) {
|
|
|
3604df |
gf_msg(fop->xl->name, fop_log_level (fop->id, op_errno), op_errno,
|
|
|
3604df |
EC_MSG_SIZE_VERS_UPDATE_FAIL,
|
|
|
3604df |
"Failed to update version and size");
|
|
|
3604df |
} else {
|
|
|
3604df |
fop->parent->good &= fop->good;
|
|
|
3604df |
- link = fop->data;
|
|
|
3604df |
- lock = link->lock;
|
|
|
3604df |
- ctx = lock->ctx;
|
|
|
3604df |
|
|
|
3604df |
ec_lock_update_good(lock, fop);
|
|
|
3604df |
|
|
|
3604df |
@@ -1822,10 +1848,11 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
|
|
|
3604df |
|
|
|
3604df |
ctx->have_info = _gf_true;
|
|
|
3604df |
}
|
|
|
3604df |
-
|
|
|
3604df |
- if ((fop->parent->id != GF_FOP_FLUSH) &&
|
|
|
3604df |
- (fop->parent->id != GF_FOP_FSYNC) &&
|
|
|
3604df |
- (fop->parent->id != GF_FOP_FSYNCDIR)) {
|
|
|
3604df |
+ /* If we are here because of fop's and other than unlock request,
|
|
|
3604df |
+ * that means we are still holding a lock. That make sure
|
|
|
3604df |
+ * lock->unlock_now can not be modified.
|
|
|
3604df |
+ */
|
|
|
3604df |
+ if (lock->unlock_now) {
|
|
|
3604df |
ec_unlock_lock(fop->data);
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -1843,6 +1870,9 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
|
|
|
3604df |
int32_t err = -ENOMEM;
|
|
|
3604df |
|
|
|
3604df |
fop = link->fop;
|
|
|
3604df |
+ ec_t *ec = fop->xl->private;
|
|
|
3604df |
+ lock = link->lock;
|
|
|
3604df |
+ ctx = lock->ctx;
|
|
|
3604df |
|
|
|
3604df |
ec_trace("UPDATE", fop, "version=%ld/%ld, size=%ld, dirty=%ld/%ld",
|
|
|
3604df |
version[0], version[1], size, dirty[0], dirty[1]);
|
|
|
3604df |
@@ -1852,9 +1882,6 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
|
|
|
3604df |
goto out;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- lock = link->lock;
|
|
|
3604df |
- ctx = lock->ctx;
|
|
|
3604df |
-
|
|
|
3604df |
/* If we don't have version information or it has been modified, we
|
|
|
3604df |
* update it. */
|
|
|
3604df |
if (!ctx->have_version || (version[0] != 0) || (version[1] != 0)) {
|
|
|
3604df |
@@ -1866,8 +1893,8 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
if (size != 0) {
|
|
|
3604df |
- /* If size has been changed, we should already know the previous size
|
|
|
3604df |
- * of the file. */
|
|
|
3604df |
+ /* If size has been changed, we should already
|
|
|
3604df |
+ * know the previous size of the file. */
|
|
|
3604df |
GF_ASSERT(ctx->have_size);
|
|
|
3604df |
|
|
|
3604df |
err = ec_dict_set_number(dict, EC_XATTR_SIZE, size);
|
|
|
3604df |
@@ -1876,13 +1903,12 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
|
|
|
3604df |
}
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
- /* If we don't have dirty information or it has been modified, we update
|
|
|
3604df |
- * it. */
|
|
|
3604df |
- if ((dirty[0] != 0) || (dirty[1] != 0)) {
|
|
|
3604df |
- err = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
|
|
|
3604df |
- if (err != 0) {
|
|
|
3604df |
- goto out;
|
|
|
3604df |
- }
|
|
|
3604df |
+ if (dirty[0] || dirty[1]) {
|
|
|
3604df |
+ err = ec_dict_set_array(dict, EC_XATTR_DIRTY,
|
|
|
3604df |
+ dirty, EC_VERSION_SIZE);
|
|
|
3604df |
+ if (err != 0) {
|
|
|
3604df |
+ goto out;
|
|
|
3604df |
+ }
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
/* If config information is not known, we request it now. */
|
|
|
3604df |
@@ -1922,9 +1948,7 @@ out:
|
|
|
3604df |
gf_msg (fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
|
|
|
3604df |
"Unable to update version and size");
|
|
|
3604df |
|
|
|
3604df |
- if ((fop->parent->id != GF_FOP_FLUSH) &&
|
|
|
3604df |
- (fop->parent->id != GF_FOP_FSYNC) &&
|
|
|
3604df |
- (fop->parent->id != GF_FOP_FSYNCDIR)) {
|
|
|
3604df |
+ if (lock->unlock_now) {
|
|
|
3604df |
ec_unlock_lock(fop->data);
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -1935,28 +1959,36 @@ ec_update_info(ec_lock_link_t *link)
|
|
|
3604df |
{
|
|
|
3604df |
ec_lock_t *lock;
|
|
|
3604df |
ec_inode_t *ctx;
|
|
|
3604df |
- uint64_t version[2];
|
|
|
3604df |
- uint64_t dirty[2];
|
|
|
3604df |
+ uint64_t version[2] = {0, 0};
|
|
|
3604df |
+ uint64_t dirty[2] = {0, 0};
|
|
|
3604df |
uint64_t size;
|
|
|
3604df |
+ ec_t *ec = NULL;
|
|
|
3604df |
|
|
|
3604df |
lock = link->lock;
|
|
|
3604df |
ctx = lock->ctx;
|
|
|
3604df |
+ ec = link->fop->xl->private;
|
|
|
3604df |
|
|
|
3604df |
/* pre_version[*] will be 0 if have_version is false */
|
|
|
3604df |
version[0] = ctx->post_version[0] - ctx->pre_version[0];
|
|
|
3604df |
version[1] = ctx->post_version[1] - ctx->pre_version[1];
|
|
|
3604df |
|
|
|
3604df |
size = ctx->post_size - ctx->pre_size;
|
|
|
3604df |
-
|
|
|
3604df |
- dirty[0] = ctx->dirty[0];
|
|
|
3604df |
- dirty[1] = ctx->dirty[1];
|
|
|
3604df |
- /*Dirty is not combined so just reset it right here*/
|
|
|
3604df |
- memset(ctx->dirty, 0, sizeof(ctx->dirty));
|
|
|
3604df |
-
|
|
|
3604df |
+ /* If we set the dirty flag for update fop, we have to unset it.
|
|
|
3604df |
+ * If fop has failed on some bricks, leave the dirty as marked. */
|
|
|
3604df |
+ if (lock->unlock_now) {
|
|
|
3604df |
+ if (!(ec->node_mask & ~lock->good_mask)) {
|
|
|
3604df |
+ if (ctx->dirty[0] != 0) {
|
|
|
3604df |
+ dirty[0] = -1;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ if (ctx->dirty[1] != 0) {
|
|
|
3604df |
+ dirty[1] = -1;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ }
|
|
|
3604df |
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
|
|
|
3604df |
+ }
|
|
|
3604df |
if ((version[0] != 0) || (version[1] != 0) ||
|
|
|
3604df |
(dirty[0] != 0) || (dirty[1] != 0)) {
|
|
|
3604df |
ec_update_size_version(link, version, size, dirty);
|
|
|
3604df |
-
|
|
|
3604df |
return _gf_true;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -1966,7 +1998,15 @@ ec_update_info(ec_lock_link_t *link)
|
|
|
3604df |
void
|
|
|
3604df |
ec_unlock_now(ec_lock_link_t *link)
|
|
|
3604df |
{
|
|
|
3604df |
+ ec_lock_t *lock;
|
|
|
3604df |
+ lock = link->lock;
|
|
|
3604df |
+
|
|
|
3604df |
ec_trace("UNLOCK_NOW", link->fop, "lock=%p", link->lock);
|
|
|
3604df |
+ /*At this point, lock is not being used by any fop and
|
|
|
3604df |
+ *can not be reused by any fop as it is going to be released.
|
|
|
3604df |
+ *lock->unlock_now can not be modified at any other place.
|
|
|
3604df |
+ */
|
|
|
3604df |
+ lock->unlock_now = _gf_true;
|
|
|
3604df |
|
|
|
3604df |
if (!ec_update_info(link)) {
|
|
|
3604df |
ec_unlock_lock(link);
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
|
|
|
3604df |
index 8e724a8..d720d24 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-common.h
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-common.h
|
|
|
3604df |
@@ -28,7 +28,8 @@ typedef enum {
|
|
|
3604df |
#define EC_CONFIG_ALGORITHM 0
|
|
|
3604df |
|
|
|
3604df |
#define EC_FLAG_LOCK_SHARED 0x0001
|
|
|
3604df |
-#define EC_FLAG_WAITING_SIZE 0x0002
|
|
|
3604df |
+#define EC_FLAG_WAITING_XATTROP 0x0002
|
|
|
3604df |
+#define EC_FLAG_QUERY_METADATA 0x0004
|
|
|
3604df |
|
|
|
3604df |
#define EC_SELFHEAL_BIT 62
|
|
|
3604df |
|
|
|
3604df |
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
|
|
|
3604df |
index 4a2a11f..a207b11 100644
|
|
|
3604df |
--- a/xlators/cluster/ec/src/ec-data.h
|
|
|
3604df |
+++ b/xlators/cluster/ec/src/ec-data.h
|
|
|
3604df |
@@ -163,7 +163,8 @@ struct _ec_lock
|
|
|
3604df |
uint32_t refs_owners; /* Refs for fops owning the lock */
|
|
|
3604df |
uint32_t refs_pending; /* Refs assigned to fops being prepared */
|
|
|
3604df |
gf_boolean_t acquired;
|
|
|
3604df |
- gf_boolean_t getting_size;
|
|
|
3604df |
+ gf_boolean_t getting_xattr;
|
|
|
3604df |
+ gf_boolean_t unlock_now;
|
|
|
3604df |
gf_boolean_t release;
|
|
|
3604df |
gf_boolean_t query;
|
|
|
3604df |
fd_t *fd;
|
|
|
3604df |
--
|
|
|
3604df |
1.7.1
|
|
|
3604df |
|