Blob Blame History Raw
From 07af401ec6a375203d9c842b6436c3b248230900 Mon Sep 17 00:00:00 2001
From: Ravishankar N <ravishankar@redhat.com>
Date: Fri, 16 Sep 2016 17:02:08 +0530
Subject: [PATCH 57/86] afr: add replication events

Patch in master: http://review.gluster.org/#/c/15349/
Patch in release-3.9:http://review.gluster.org/#/c/15417/

Added the following events for the eventing framework:

  "EVENT_AFR_QUORUM_MET", --> Sent when quorum is met.
  "EVENT_AFR_QUORUM_FAIL" -->Sent when quorum is lost.
  "EVENT_AFR_SUBVOL_UP" -->Sent when afr witnesses the first up subvolume.
  "EVENT_AFR_SUBVOLS_DOWN"-->Sent when all children of an afr subvol are down.
  "EVENT_AFR_SPLIT_BRAIN" -->Sent when self-heal detects split-brain in heal
                             path (not read/write path).

Change-Id: I94828c57dc343062392e65ade0e1a6f93e3d152f
BUG: 1361082
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/84802
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Tested-by: Atin Mukherjee <amukherj@redhat.com>
---
 xlators/cluster/afr/src/afr-common.c           |   16 +++++++-
 xlators/cluster/afr/src/afr-self-heal-common.c |    8 ++++
 xlators/cluster/afr/src/afr-self-heal-entry.c  |   27 ++++++++++++--
 xlators/cluster/afr/src/afr-self-heal-name.c   |   47 +++++++++++++++++++-----
 4 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 9b2c0d7..8f0de59 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -31,6 +31,7 @@
 #include "byte-order.h"
 #include "statedump.h"
 #include "inode.h"
+#include "events.h"
 
 #include "fd.h"
 
@@ -4292,6 +4293,9 @@ afr_notify (xlator_t *this, int32_t event,
                                         AFR_MSG_SUBVOL_UP,
                                         "Subvolume '%s' came back up; "
                                      "going online.", ((xlator_t *)data)->name);
+                                gf_event (EVENT_AFR_SUBVOL_UP,
+                                          "subvol=%s", this->name);
+
                         } else {
                                 event = GF_EVENT_CHILD_MODIFIED;
                         }
@@ -4314,6 +4318,8 @@ afr_notify (xlator_t *this, int32_t event,
                                         AFR_MSG_ALL_SUBVOLS_DOWN,
                                        "All subvolumes are down. Going offline "
                                     "until atleast one of them comes back up.");
+                                gf_event (EVENT_AFR_SUBVOLS_DOWN,
+                                          "subvol=%s", this->name);
                         } else {
                                 event = GF_EVENT_SOME_CHILD_DOWN;
                         }
@@ -4365,13 +4371,19 @@ afr_notify (xlator_t *this, int32_t event,
 
         if (priv->quorum_count) {
                 has_quorum = afr_has_quorum (priv->child_up, this);
-                if (!had_quorum && has_quorum)
+                if (!had_quorum && has_quorum) {
                         gf_msg (this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
                                 "Client-quorum is met");
-                if (had_quorum && !has_quorum)
+                        gf_event  (EVENT_AFR_QUORUM_MET,
+                                   "subvol=%s", this->name);
+                }
+                if (had_quorum && !has_quorum) {
                         gf_msg (this->name, GF_LOG_WARNING, 0,
                                 AFR_MSG_QUORUM_FAIL,
                                 "Client-quorum is not met");
+                        gf_event  (EVENT_AFR_QUORUM_FAIL, "subvol=%s",
+                                   this->name);
+                }
         }
 
         /* if all subvols have reported status, no need to hide anything
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 58db6d1..d7ffb35 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -14,6 +14,7 @@
 #include "byte-order.h"
 #include "protocol-common.h"
 #include "afr-messages.h"
+#include "events.h"
 
 void
 afr_heal_synctask (xlator_t *this, afr_local_t *local);
@@ -1653,6 +1654,13 @@ afr_selfheal_unlocked_inspect (call_frame_t *frame, xlator_t *this,
 				(int) replies[i].poststat.ia_type,
 				priv->children[i]->name,
 				uuid_utoa (replies[i].poststat.ia_gfid));
+                        gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;"
+                                "msg=file type mismatch;gfid=%s;"
+                                "ia_type-%d=%s;ia_type-%d=%s",
+                                this->name,
+                                uuid_utoa (replies[i].poststat.ia_gfid), first,
+                                gf_inode_type_to_str (first.ia_type), i,
+                            gf_inode_type_to_str (replies[i].poststat.ia_type));
                         ret = -EIO;
                         goto out;
 		}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 985cebe..bf55ede 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -15,6 +15,7 @@
 #include "afr-transaction.h"
 #include "afr-messages.h"
 #include "syncop-utils.h"
+#include "events.h"
 
 /* Max file name length is 255 this filename is of length 256. No file with
  * this name can ever come, entry-lock with this name is going to prevent
@@ -240,13 +241,22 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
                                   replies[i].poststat.ia_gfid)) {
                         gf_msg (this->name, GF_LOG_ERROR, 0,
                                 AFR_MSG_SPLIT_BRAIN, "Gfid mismatch "
-                                "detected for <%s/%s>, %s on %s and %s on %s. "
+                                "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
                                 "Skipping conservative merge on the file.",
                                 uuid_utoa (pargfid), bname,
                                 uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
                                 priv->children[i]->name,
                                 uuid_utoa_r (replies[src_idx].poststat.ia_gfid,
                                 g2), priv->children[src_idx]->name);
+                        gf_event (EVENT_AFR_SPLIT_BRAIN,
+                                 "subvol=%s;msg=gfid mismatch. Skipping "
+                                 "conservative merge.;file=<gfid:%s>/%s>;count=2;"
+                                 "child-%d=%s;gfid-%d=%s;child-%d=%s;gfid-%d=%s",
+                                 this->name, uuid_utoa (pargfid), bname, i,
+                                 priv->children[i]->name, i,
+                                 uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+                                src_idx, priv->children[src_idx]->name, src_idx,
+                           uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2));
                         return -1;
                 }
 
@@ -254,13 +264,22 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this,
                     (replies[i].poststat.ia_type)) {
                         gf_msg (this->name, GF_LOG_ERROR, 0,
                                 AFR_MSG_SPLIT_BRAIN, "Type mismatch "
-                                "detected for <%s/%s>, %d on %s and %d on %s. "
+                                "detected for <gfid:%s>/%s>, %s on %s and %s on %s. "
                                 "Skipping conservative merge on the file.",
                                 uuid_utoa (pargfid), bname,
-                                replies[i].poststat.ia_type,
+                             gf_inode_type_to_str (replies[i].poststat.ia_type),
                                 priv->children[i]->name,
-                                replies[src_idx].poststat.ia_type,
+                       gf_inode_type_to_str (replies[src_idx].poststat.ia_type),
                                 priv->children[src_idx]->name);
+                        gf_event (EVENT_AFR_SPLIT_BRAIN,
+                                 "subvol=%s;msg=file type mismatch. Skipping "
+                                 "conservative merge;file=<gfid:%s>/%s>;count=2;"
+                                 "child-%d=%s;type-%d=%s;child-%d=%s;type-%d=%s",
+                                 this->name, uuid_utoa (pargfid), bname, i,
+                                 priv->children[i]->name, i,
+                              gf_inode_type_to_str(replies[i].poststat.ia_type),
+                                src_idx, priv->children[src_idx]->name, src_idx,
+                       gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
                         return -1;
                 }
         }
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
index 3445ecc..acacea8 100644
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -9,6 +9,7 @@
 */
 
 
+#include "events.h"
 #include "afr.h"
 #include "afr-self-heal.h"
 #include "afr-messages.h"
@@ -274,6 +275,7 @@ afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies
         int             i           = 0;
         int             type_idx    = -1;
         ia_type_t       inode_type  = IA_INVAL;
+        ia_type_t       inode_type1 = IA_INVAL;
         afr_private_t  *priv        = NULL;
 
         priv = this->private;
@@ -290,21 +292,32 @@ afr_selfheal_name_type_mismatch_check (xlator_t *this, struct afr_reply *replies
                         type_idx = i;
                         continue;
                 }
-
+                inode_type1 = replies[i].poststat.ia_type;
                 if (sources[i] || source == -1) {
                         if ((sources[type_idx] || source == -1) &&
-                            (inode_type != replies[i].poststat.ia_type)) {
+                            (inode_type != inode_type1)) {
                                 gf_msg (this->name, GF_LOG_WARNING, 0,
                                         AFR_MSG_SPLIT_BRAIN,
                                         "Type mismatch for <gfid:%s>/%s: "
-                                        "%d on %s and %d on %s",
+                                        "%s on %s and %s on %s",
                                         uuid_utoa(pargfid), bname,
-                                        replies[i].poststat.ia_type,
+                                        gf_inode_type_to_str (inode_type1),
                                         priv->children[i]->name,
-                                        replies[type_idx].poststat.ia_type,
+                                        gf_inode_type_to_str (inode_type),
                                         priv->children[type_idx]->name);
-
-                                    return -EIO;
+                                gf_event (EVENT_AFR_SPLIT_BRAIN,
+                                         "subvol=%s;msg=file type mismatch;"
+                                         "file=<gfid:%s>/%s;count=2;"
+                                         "child-%d=%s;type-%d=%s;child-%d=%s;"
+                                         "type-%d=%s", this->name,
+                                         uuid_utoa (pargfid), bname, i,
+                                         priv->children[i]->name, i,
+                                         gf_inode_type_to_str (inode_type1),
+                                         type_idx,
+                                         priv->children[type_idx]->name,
+                                         type_idx,
+                                         gf_inode_type_to_str (inode_type));
+                                return -EIO;
                         }
                         inode_type = replies[i].poststat.ia_type;
                         type_idx = i;
@@ -322,6 +335,7 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies
         int             i             = 0;
 	int             gfid_idx_iter = -1;
         void           *gfid          = NULL;
+        void           *gfid1         = NULL;
         afr_private_t  *priv          = NULL;
 	char g1[64], g2[64];
 
@@ -340,18 +354,31 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies
 			continue;
 		}
 
+                gfid1 = &replies[i].poststat.ia_gfid;
 		if (sources[i] || source == -1) {
 			if ((sources[gfid_idx_iter] || source == -1) &&
-			    gf_uuid_compare (gfid, replies[i].poststat.ia_gfid)) {
+			    gf_uuid_compare (gfid, gfid1)) {
 			        gf_msg (this->name, GF_LOG_WARNING, 0,
                                         AFR_MSG_SPLIT_BRAIN,
 					"GFID mismatch for <gfid:%s>/%s "
 					"%s on %s and %s on %s",
 					uuid_utoa (pargfid), bname,
-					uuid_utoa_r (replies[i].poststat.ia_gfid, g1),
+					uuid_utoa_r (gfid1, g1),
 					priv->children[i]->name,
-					uuid_utoa_r (replies[gfid_idx_iter].poststat.ia_gfid, g2),
+					uuid_utoa_r (gfid, g2),
 					priv->children[gfid_idx_iter]->name);
+                                gf_event (EVENT_AFR_SPLIT_BRAIN,
+                                        "subvol=%s;msg=gfid mismatch;"
+                                        "file=<gfid:%s>/%s;count=2;"
+                                        "child-%d=%s;gfid-%d=%s;child-%d=%s;"
+                                        "gfid-%d=%s", this->name,
+                                        uuid_utoa (pargfid), bname, i,
+                                        priv->children[i]->name, i,
+                                        uuid_utoa_r (gfid1, g1),
+                                        gfid_idx_iter,
+                                        priv->children[gfid_idx_iter]->name,
+                                        gfid_idx_iter,
+                                        uuid_utoa_r (gfid, g2));
 
 				return -EIO;
 			}
-- 
1.7.1