7f4c2a
From 37535f45135b5f6f9b62c8e011b96fc0b90743f7 Mon Sep 17 00:00:00 2001
7f4c2a
From: Ravishankar N <ravishankar@redhat.com>
7f4c2a
Date: Fri, 26 Jun 2015 17:29:20 +0530
7f4c2a
Subject: [PATCH 148/190] afr: Block fops when file is in split-brain
7f4c2a
7f4c2a
Patch in master: http://review.gluster.org/#/c/11371/
7f4c2a
Patch in release-3.7: http://review.gluster.org/#/c/11420/
7f4c2a
7f4c2a
For directories, block metadata FOPS.
7f4c2a
For non-directories, block data and metadata FOPS.
7f4c2a
Do not block entry FOPS.
7f4c2a
7f4c2a
Change-Id: I5d44bd8ece08ec683f930d797541eae719257857
7f4c2a
BUG: 1223738
7f4c2a
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/51691
7f4c2a
---
7f4c2a
 tests/bugs/glusterfs/bug-873962.t         |    8 ++--
7f4c2a
 xlators/cluster/afr/src/afr-common.c      |   58 +++++++++++++++++++++++++++++
7f4c2a
 xlators/cluster/afr/src/afr-read-txn.c    |   22 ++++-------
7f4c2a
 xlators/cluster/afr/src/afr-transaction.c |    7 +++
7f4c2a
 xlators/cluster/afr/src/afr.h             |    3 +
7f4c2a
 5 files changed, 80 insertions(+), 18 deletions(-)
7f4c2a
7f4c2a
diff --git a/tests/bugs/glusterfs/bug-873962.t b/tests/bugs/glusterfs/bug-873962.t
7f4c2a
index 492d028..7faa999 100755
7f4c2a
--- a/tests/bugs/glusterfs/bug-873962.t
7f4c2a
+++ b/tests/bugs/glusterfs/bug-873962.t
7f4c2a
@@ -65,8 +65,8 @@ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $
7f4c2a
 #Files are in split-brain, so open should fail
7f4c2a
 TEST ! cat $M0/a;
7f4c2a
 TEST ! cat $M1/a;
7f4c2a
-TEST cat $M0/b;
7f4c2a
-TEST cat $M1/b;
7f4c2a
+TEST ! cat $M0/b;
7f4c2a
+TEST ! cat $M1/b;
7f4c2a
 
7f4c2a
 #Reset split-brain status
7f4c2a
 TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a;
7f4c2a
@@ -92,8 +92,8 @@ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $
7f4c2a
 #Files are in split-brain, so open should fail
7f4c2a
 TEST ! cat $M0/c
7f4c2a
 TEST ! cat $M1/c
7f4c2a
-TEST cat $M0/d
7f4c2a
-TEST cat $M1/d
7f4c2a
+TEST ! cat $M0/d
7f4c2a
+TEST ! cat $M1/d
7f4c2a
 
7f4c2a
 TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c
7f4c2a
 TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
7f4c2a
index 7150f0f..9129b27 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-common.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-common.c
7f4c2a
@@ -341,6 +341,58 @@ out:
7f4c2a
 }
7f4c2a
 
7f4c2a
 int
7f4c2a
+afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
7f4c2a
+                       unsigned char *readable, int *event_p, int type)
7f4c2a
+{
7f4c2a
+
7f4c2a
+        afr_private_t *priv = this->private;
7f4c2a
+        afr_local_t *local = frame->local;
7f4c2a
+        unsigned char *data = alloca0 (priv->child_count);
7f4c2a
+        unsigned char *metadata = alloca0 (priv->child_count);
7f4c2a
+        int data_count = 0;
7f4c2a
+        int metadata_count = 0;
7f4c2a
+        int event_generation = 0;
7f4c2a
+        int ret = 0;
7f4c2a
+
7f4c2a
+        /* We don't care about split-brains for entry transactions. */
7f4c2a
+        if (type == AFR_ENTRY_TRANSACTION || type == AFR_ENTRY_RENAME_TRANSACTION)
7f4c2a
+                return 0;
7f4c2a
+
7f4c2a
+        ret = afr_inode_read_subvol_get (inode, this, data, metadata,
7f4c2a
+                                         &event_generation);
7f4c2a
+        if (ret == -1)
7f4c2a
+                return -EIO;
7f4c2a
+
7f4c2a
+        data_count = AFR_COUNT (data, priv->child_count);
7f4c2a
+        metadata_count = AFR_COUNT (metadata, priv->child_count);
7f4c2a
+
7f4c2a
+        if (inode->ia_type == IA_IFDIR) {
7f4c2a
+                /* For directories, allow even if it is in data split-brain. */
7f4c2a
+                if (type == AFR_METADATA_TRANSACTION) {
7f4c2a
+                        if (!metadata_count)
7f4c2a
+                                return -EIO;
7f4c2a
+                }
7f4c2a
+        } else {
7f4c2a
+                /* For files, abort in case of data/metadata split-brain. */
7f4c2a
+                if (!data_count || !metadata_count)
7f4c2a
+                        return -EIO;
7f4c2a
+        }
7f4c2a
+
7f4c2a
+        if (type == AFR_METADATA_TRANSACTION && readable)
7f4c2a
+                memcpy (readable, metadata, priv->child_count * sizeof *metadata);
7f4c2a
+        if (type == AFR_DATA_TRANSACTION && readable) {
7f4c2a
+                if (!data_count)
7f4c2a
+                        memcpy (readable, local->child_up,
7f4c2a
+                                priv->child_count * sizeof *readable);
7f4c2a
+                else
7f4c2a
+                        memcpy (readable, data, priv->child_count * sizeof *data);
7f4c2a
+        }
7f4c2a
+        if (event_p)
7f4c2a
+                *event_p = event_generation;
7f4c2a
+        return 0;
7f4c2a
+}
7f4c2a
+
7f4c2a
+int
7f4c2a
 afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
7f4c2a
                                   int *spb_choice)
7f4c2a
 {
7f4c2a
@@ -598,6 +650,8 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
7f4c2a
 	for (i = 0; i < priv->child_count; i++) {
7f4c2a
 		if (data_accused[i])
7f4c2a
 			continue;
7f4c2a
+                if ((priv->arbiter_count == 1) && (i == ARBITER_BRICK_INDEX))
7f4c2a
+                        continue;
7f4c2a
 		if (replies[i].poststat.ia_size < maxsize)
7f4c2a
 			data_accused[i] = 1;
7f4c2a
 	}
7f4c2a
@@ -1682,6 +1736,10 @@ afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
7f4c2a
          * the slowest local subvolume is far preferable to a remote one.
7f4c2a
          */
7f4c2a
         if (is_local) {
7f4c2a
+                /* Don't set arbiter as read child. */
7f4c2a
+                if ((priv->arbiter_count == 1) &&
7f4c2a
+                    (child_index == ARBITER_BRICK_INDEX))
7f4c2a
+                        goto out;
7f4c2a
                 gf_log (this->name, GF_LOG_INFO,
7f4c2a
                         "selecting local read_child %s",
7f4c2a
                         priv->children[child_index]->name);
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
7f4c2a
index 6121108..6e54549 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-read-txn.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-read-txn.c
7f4c2a
@@ -52,6 +52,9 @@ afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)
7f4c2a
                 local->op_ret = ret;                              \
7f4c2a
                 local->op_errno = errnum;                          \
7f4c2a
                 read_subvol = index;                              \
7f4c2a
+                gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,\
7f4c2a
+                        "Failing %s on gfid %s: split-brain observed.",\
7f4c2a
+                        gf_fop_list[local->op], uuid_utoa (inode->gfid));\
7f4c2a
                 goto label;                                       \
7f4c2a
         } while (0)
7f4c2a
 
7f4c2a
@@ -59,7 +62,6 @@ int
7f4c2a
 afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
7f4c2a
 {
7f4c2a
 	afr_local_t *local = NULL;
7f4c2a
-        afr_private_t *priv = NULL;
7f4c2a
 	int read_subvol = 0;
7f4c2a
 	int event_generation = 0;
7f4c2a
 	inode_t *inode = NULL;
7f4c2a
@@ -68,27 +70,19 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
7f4c2a
 
7f4c2a
 	local = frame->local;
7f4c2a
 	inode = local->inode;
7f4c2a
-        priv  = frame->this->private;
7f4c2a
 
7f4c2a
 	if (err)
7f4c2a
                 AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, -err, -1, readfn);
7f4c2a
 
7f4c2a
-	ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
7f4c2a
-					      &event_generation,
7f4c2a
-					      local->transaction.type);
7f4c2a
+	ret = afr_inode_get_readable (frame, inode, this, local->readable,
7f4c2a
+			              &event_generation,
7f4c2a
+				      local->transaction.type);
7f4c2a
 
7f4c2a
 	if (ret == -1 || !event_generation)
7f4c2a
 		/* Even after refresh, we don't have a good
7f4c2a
 		   read subvolume. Time to bail */
7f4c2a
                 AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
7f4c2a
 
7f4c2a
-         /* For directories in split-brain, we need to allow all fops
7f4c2a
-          * except (f)getxattr and access. */
7f4c2a
-        if (!AFR_COUNT(local->readable, priv->child_count) &&
7f4c2a
-            local->transaction.type == AFR_DATA_TRANSACTION &&
7f4c2a
-            inode->ia_type == IA_IFDIR)
7f4c2a
-                memcpy (local->readable, local->child_up, priv->child_count);
7f4c2a
-
7f4c2a
 	read_subvol = afr_read_subvol_select_by_policy (inode, this,
7f4c2a
 							local->readable, NULL);
7f4c2a
 	if (read_subvol == -1)
7f4c2a
@@ -237,8 +231,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
7f4c2a
 	if (read_subvol < 0 || read_subvol > priv->child_count) {
7f4c2a
 		gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
7f4c2a
                        "Unreadable subvolume %d found with event generation "
7f4c2a
-                       "%d. (Possible split-brain)",
7f4c2a
-                        read_subvol, event_generation);
7f4c2a
+                       "%d for gfid %s. (Possible split-brain)",
7f4c2a
+                        read_subvol, event_generation, uuid_utoa(inode->gfid));
7f4c2a
 		goto refresh;
7f4c2a
 	}
7f4c2a
 
7f4c2a
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
7f4c2a
index a202388..b27cfed 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr-transaction.c
7f4c2a
+++ b/xlators/cluster/afr/src/afr-transaction.c
7f4c2a
@@ -1967,6 +1967,13 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
7f4c2a
         if (ret < 0)
7f4c2a
             goto out;
7f4c2a
 
7f4c2a
+        ret = afr_inode_get_readable (frame, local->inode, this, 0, 0, type);
7f4c2a
+        if (ret) {
7f4c2a
+                gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,
7f4c2a
+                        "Failing %s on gfid %s: split-brain observed.",
7f4c2a
+                        gf_fop_list[local->op], uuid_utoa (local->inode->gfid));
7f4c2a
+                goto out;
7f4c2a
+        }
7f4c2a
         afr_transaction_eager_lock_init (local, this);
7f4c2a
 
7f4c2a
         if (local->fd && local->transaction.eager_lock_on)
7f4c2a
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
7f4c2a
index e6e7f3e..729ae82 100644
7f4c2a
--- a/xlators/cluster/afr/src/afr.h
7f4c2a
+++ b/xlators/cluster/afr/src/afr.h
7f4c2a
@@ -766,6 +766,9 @@ typedef struct afr_read_subvol_args {
7f4c2a
                                             (op_errno == EBADFD)))
7f4c2a
 
7f4c2a
 int
7f4c2a
+afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
7f4c2a
+                        unsigned char *readable, int *event_p, int type);
7f4c2a
+int
7f4c2a
 afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
7f4c2a
 			   unsigned char *data_subvols,
7f4c2a
 			   unsigned char *metadata_subvols,
7f4c2a
-- 
7f4c2a
1.7.1
7f4c2a