From 07af401ec6a375203d9c842b6436c3b248230900 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Fri, 16 Sep 2016 17:02:08 +0530
Subject: [PATCH 57/86] afr: add replication events
Patch in master: http://review.gluster.org/#/c/15349/
Patch in release-3.9:http://review.gluster.org/#/c/15417/
Added the following events for the eventing framework:
"EVENT_AFR_QUORUM_MET", --> Sent when quorum is met.
"EVENT_AFR_QUORUM_FAIL" -->Sent when quorum is lost.
"EVENT_AFR_SUBVOL_UP" -->Sent when afr witnesses the first up subvolume.
"EVENT_AFR_SUBVOLS_DOWN"-->Sent when all children of an afr subvol are down.
"EVENT_AFR_SPLIT_BRAIN" -->Sent when self-heal detects split-brain in heal
path (not read/write path).
Change-Id: I94828c57dc343062392e65ade0e1a6f93e3d152f
BUG: 1361082
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/84802
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Tested-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/afr/src/afr-common.c | 16 +++++++-
xlators/cluster/afr/src/afr-self-heal-common.c | 8 ++++
xlators/cluster/afr/src/afr-self-heal-entry.c | 27 ++++++++++++--
xlators/cluster/afr/src/afr-self-heal-name.c | 47 +++++++++++++++++++-----
4 files changed, 82 insertions(+), 16 deletions(-)
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 9b2c0d7..8f0de59 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -31,6 +31,7 @@
#include "byte-order.h"
#include "statedump.h"
#include "inode.h"
+#include "events.h"
#include "fd.h"
@@ -4292,6 +4293,9 @@ afr_notify (xlator_t *this, int32_t event,
AFR_MSG_SUBVOL_UP,
"Subvolume '%s' came back up; "
"going online.", ((xlator_t *)data)->name);
+ gf_event (EVENT_AFR_SUBVOL_UP,
+ "subvol=%s", this->name);
+
} else {
event = GF_EVENT_CHILD_MODIFIED;
}
@@ -4314,6 +4318,8 @@ afr_notify (xlator_t *this, int32_t event,
AFR_MSG_ALL_SUBVOLS_DOWN,
"All subvolumes are down. Going offline "
"until atleast one of them comes back up.");
+ gf_event (EVENT_AFR_SUBVOLS_DOWN,
+ "subvol=%s", this->name);
} else {
event = GF_EVENT_SOME_CHILD_DOWN;
}
@@ -4365,13 +4371,19 @@ afr_notify (xlator_t *this, int32_t event,
if (priv->quorum_count) {
has_quorum = afr_has_quorum (priv->child_up, this);
- if (!had_quorum && has_quorum)
+ if (!had_quorum && has_quorum) {
gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
"Client-quorum is met");
- if (had_quorum && !has_quorum)
+ gf_event (EVENT_AFR_QUORUM_MET,
+ "subvol=%s", this->name);
+ }
+ if (had_quorum && !has_quorum) {
gf_msg (this->name, GF_LOG_WARNING, 0,
AFR_MSG_QUORUM_FAIL,
"Client-quorum is not met");
+ gf_event (EVENT_AFR_QUORUM_FAIL, "subvol=%s",
+ this->name);
+ }
}
/* if all subvols have reported status, no need to hide anything
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 58db6d1..d7ffb35 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -14,6 +14,7 @@
#include "byte-order.h"
#include "protocol-common.h"
#include "afr-messages.h"
+#include "events.h"
void
afr_heal_synctask (xlator_t *this, afr_local_t *local);
@@ -1653,6 +1654,13 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
(int) replies[i].poststat.ia_type,
priv->children[i]->name,
uuid_utoa (replies[i].poststat.ia_gfid));
+ gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;"
+ "msg=file type mismatch;gfid=%s;"
+ "ia_type-%d=%s;ia_type-%d=%s",
+ this->name,
+ uuid_utoa (replies[i].poststat.ia_gfid), first,
+ gf_inode_type_to_str (first.ia_type), i,
+ gf_inode_type_to_str (replies[i].poststat.ia_type));
ret = -EIO;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 985cebe..bf55ede 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -15,6 +15,7 @@
#include "afr-transaction.h"
#include "afr-messages.h"
#include "syncop-utils.h"
+#include "events.h"
/* Max file name length is 255 this filename is of length 256. No file with
* this name can ever come, entry-lock with this name is going to prevent
@@ -240,13 +241,22 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
replies[i].poststat.ia_gfid)) {
gf_msg (this->name, GF_LOG_ERROR, 0,
AFR_MSG_SPLIT_BRAIN, "Gfid mismatch "
- "detected for <%s/%s>, %s on %s and %s on %s. "
+ "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
"Skipping conservative merge on the file.",
uuid_utoa (pargfid), bname,
uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
priv->children[i]->name,
uuid_utoa_r (replies[src_idx].poststat.ia_gfid,
g2), priv->children[src_idx]->name);
+ gf_event (EVENT_AFR_SPLIT_BRAIN,
+ "subvol=%s;msg=gfid mismatch. Skipping "
+ "conservative merge.;file=<gfid:%s>/%s>;count=2;"
+ "child-%d=%s;gfid-%d=%s;child-%d=%s;gfid-%d=%s",
+ this->name, uuid_utoa (pargfid), bname, i,
+ priv->children[i]->name, i,
+ uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+ src_idx, priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2));
return -1;
}
@@ -254,13 +264,22 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
(replies[i].poststat.ia_type)) {
gf_msg (this->name, GF_LOG_ERROR, 0,
AFR_MSG_SPLIT_BRAIN, "Type mismatch "
- "detected for <%s/%s>, %d on %s and %d on %s. "
+ "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
"Skipping conservative merge on the file.",
uuid_utoa (pargfid), bname,
- replies[i].poststat.ia_type,
+ gf_inode_type_to_str (replies[i].poststat.ia_type),
priv->children[i]->name,
- replies[src_idx].poststat.ia_type,
+ gf_inode_type_to_str (replies[src_idx].poststat.ia_type),
priv->children[src_idx]->name);
+ gf_event (EVENT_AFR_SPLIT_BRAIN,
+ "subvol=%s;msg=file type mismatch. Skipping "
+ "conservative merge;file=<gfid:%s>/%s>;count=2;"
+ "child-%d=%s;type-%d=%s;child-%d=%s;type-%d=%s",
+ this->name, uuid_utoa (pargfid), bname, i,
+ priv->children[i]->name, i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type),
+ src_idx, priv->children[src_idx]->name, src_idx,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
return -1;
}
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 3445ecc..acacea8 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -9,6 +9,7 @@
*/
+#include "events.h"
#include "afr.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
@@ -274,6 +275,7 @@ afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies
int i = 0;
int type_idx = -1;
ia_type_t inode_type = IA_INVAL;
+ ia_type_t inode_type1 = IA_INVAL;
afr_private_t *priv = NULL;
priv = this->private;
@@ -290,21 +292,32 @@ afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies
type_idx = i;
continue;
}
-
+ inode_type1 = replies[i].poststat.ia_type;
if (sources[i] || source == -1) {
if ((sources[type_idx] || source == -1) &&
- (inode_type != replies[i].poststat.ia_type)) {
+ (inode_type != inode_type1)) {
gf_msg (this->name, GF_LOG_WARNING, 0,
AFR_MSG_SPLIT_BRAIN,
"Type mismatch for <gfid:%s>/%s: "
- "%d on %s and %d on %s",
+ "%s on %s and %s on %s",
uuid_utoa(pargfid), bname,
- replies[i].poststat.ia_type,
+ gf_inode_type_to_str (inode_type1),
priv->children[i]->name,
- replies[type_idx].poststat.ia_type,
+ gf_inode_type_to_str (inode_type),
priv->children[type_idx]->name);
-
- return -EIO;
+ gf_event (EVENT_AFR_SPLIT_BRAIN,
+ "subvol=%s;msg=file type mismatch;"
+ "file=<gfid:%s>/%s;count=2;"
+ "child-%d=%s;type-%d=%s;child-%d=%s;"
+ "type-%d=%s", this->name,
+ uuid_utoa (pargfid), bname, i,
+ priv->children[i]->name, i,
+ gf_inode_type_to_str (inode_type1),
+ type_idx,
+ priv->children[type_idx]->name,
+ type_idx,
+ gf_inode_type_to_str (inode_type));
+ return -EIO;
}
inode_type = replies[i].poststat.ia_type;
type_idx = i;
@@ -322,6 +335,7 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies
int i = 0;
int gfid_idx_iter = -1;
void *gfid = NULL;
+ void *gfid1 = NULL;
afr_private_t *priv = NULL;
char g1[64], g2[64];
@@ -340,18 +354,31 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies
continue;
}
+ gfid1 = &replies[i].poststat.ia_gfid;
if (sources[i] || source == -1) {
if ((sources[gfid_idx_iter] || source == -1) &&
- gf_uuid_compare (gfid, replies[i].poststat.ia_gfid)) {
+ gf_uuid_compare (gfid, gfid1)) {
gf_msg (this->name, GF_LOG_WARNING, 0,
AFR_MSG_SPLIT_BRAIN,
"GFID mismatch for <gfid:%s>/%s "
"%s on %s and %s on %s",
uuid_utoa (pargfid), bname,
- uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+ uuid_utoa_r (gfid1, g1),
priv->children[i]->name,
- uuid_utoa_r (replies[gfid_idx_iter].poststat.ia_gfid, g2),
+ uuid_utoa_r (gfid, g2),
priv->children[gfid_idx_iter]->name);
+ gf_event (EVENT_AFR_SPLIT_BRAIN,
+ "subvol=%s;msg=gfid mismatch;"
+ "file=<gfid:%s>/%s;count=2;"
+ "child-%d=%s;gfid-%d=%s;child-%d=%s;"
+ "gfid-%d=%s", this->name,
+ uuid_utoa (pargfid), bname, i,
+ priv->children[i]->name, i,
+ uuid_utoa_r (gfid1, g1),
+ gfid_idx_iter,
+ priv->children[gfid_idx_iter]->name,
+ gfid_idx_iter,
+ uuid_utoa_r (gfid, g2));
return -EIO;
}
--
1.7.1