14f8ab
From 0d54bb417e982a100ceefb5eab2a61a17e840f39 Mon Sep 17 00:00:00 2001
14f8ab
From: Pranith Kumar K <pkarampu@redhat.com>
14f8ab
Date: Thu, 5 Sep 2019 16:12:39 +0530
14f8ab
Subject: [PATCH 289/297] cluster/ec: quorum-count implementation
14f8ab
14f8ab
Upstream-patch: https://review.gluster.org/c/glusterfs/+/23366
14f8ab
upstream-issue: #721
14f8ab
fixes: bz#1748688
14f8ab
Change-Id: I5333540e3c635ccf441cf1f4696e4c8986e38ea8
14f8ab
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/180674
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Ashish Pandey <aspandey@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 libglusterfs/src/glusterfs/globals.h             |   4 +-
14f8ab
 tests/basic/ec/ec-quorum-count-partial-failure.t |  50 +++++++
14f8ab
 tests/basic/ec/ec-quorum-count.t                 | 165 +++++++++++++++++++++++
14f8ab
 tests/ec.rc                                      |   9 ++
14f8ab
 xlators/cluster/ec/src/ec-common.c               |  13 ++
14f8ab
 xlators/cluster/ec/src/ec-common.h               |  24 ++++
14f8ab
 xlators/cluster/ec/src/ec-dir-write.c            |  57 ++++----
14f8ab
 xlators/cluster/ec/src/ec-inode-write.c          |  61 ++++-----
14f8ab
 xlators/cluster/ec/src/ec-types.h                |   1 +
14f8ab
 xlators/cluster/ec/src/ec.c                      |  13 ++
14f8ab
 xlators/mgmt/glusterd/src/glusterd-volume-set.c  |  46 +++++++
14f8ab
 11 files changed, 383 insertions(+), 60 deletions(-)
14f8ab
 create mode 100755 tests/basic/ec/ec-quorum-count-partial-failure.t
14f8ab
 create mode 100644 tests/basic/ec/ec-quorum-count.t
14f8ab
14f8ab
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
14f8ab
index 55476f6..bdc8b3d 100644
14f8ab
--- a/libglusterfs/src/glusterfs/globals.h
14f8ab
+++ b/libglusterfs/src/glusterfs/globals.h
14f8ab
@@ -50,7 +50,7 @@
14f8ab
     1 /* MIN is the fresh start op-version, mostly                             \
14f8ab
          should not change */
14f8ab
 #define GD_OP_VERSION_MAX                                                      \
14f8ab
-    GD_OP_VERSION_7_0 /* MAX VERSION is the maximum                            \
14f8ab
+    GD_OP_VERSION_8_0 /* MAX VERSION is the maximum                            \
14f8ab
                          count in VME table, should                            \
14f8ab
                          keep changing with                                    \
14f8ab
                          introduction of newer                                 \
14f8ab
@@ -136,6 +136,8 @@
14f8ab
 
14f8ab
 #define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
14f8ab
 
14f8ab
+#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
14f8ab
+
14f8ab
 #include "glusterfs/xlator.h"
14f8ab
 #include "glusterfs/options.h"
14f8ab
 
14f8ab
diff --git a/tests/basic/ec/ec-quorum-count-partial-failure.t b/tests/basic/ec/ec-quorum-count-partial-failure.t
14f8ab
new file mode 100755
14f8ab
index 0000000..79f5825
14f8ab
--- /dev/null
14f8ab
+++ b/tests/basic/ec/ec-quorum-count-partial-failure.t
14f8ab
@@ -0,0 +1,50 @@
14f8ab
+#!/bin/bash
14f8ab
+
14f8ab
+. $(dirname $0)/../../include.rc
14f8ab
+. $(dirname $0)/../../volume.rc
14f8ab
+
14f8ab
+#This test checks that partial failure of fop results in main fop failure only
14f8ab
+cleanup;
14f8ab
+
14f8ab
+TEST glusterd
14f8ab
+TEST pidof glusterd
14f8ab
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
14f8ab
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
14f8ab
+TEST $CLI volume set $V0 performance.flush-behind off
14f8ab
+TEST $CLI volume start $V0
14f8ab
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
14f8ab
+
14f8ab
+TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1
14f8ab
+TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1
14f8ab
+TEST cp $M0/b $M0/c
14f8ab
+TEST fallocate -p -l 101 $M0/c
14f8ab
+TEST $CLI volume stop $V0
14f8ab
+TEST $CLI volume set $V0 debug.delay-gen posix;
14f8ab
+TEST $CLI volume set $V0 delay-gen.delay-duration 10000000;
14f8ab
+TEST $CLI volume set $V0 delay-gen.enable WRITE;
14f8ab
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
14f8ab
+TEST $CLI volume set $V0 disperse.quorum-count 6
14f8ab
+TEST $CLI volume start $V0
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
14f8ab
+cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}')
14f8ab
+truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure
14f8ab
+fallocate -p -l 101 $M0/b &
14f8ab
+sleep 1
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}0
14f8ab
+TEST wait
14f8ab
+TEST $CLI volume start $V0 force
14f8ab
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
14f8ab
+EXPECT "12345" stat --format=%s $M0/a
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}1
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}2
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
14f8ab
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
14f8ab
+cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}')
14f8ab
+TEST [[ $cksum == $cksum_after_heal ]]
14f8ab
+cksum=$(dd if=$M0/c | md5sum | awk '{print $1}')
14f8ab
+cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}')
14f8ab
+TEST [[ $cksum == $cksum_after_heal ]]
14f8ab
+
14f8ab
+cleanup;
14f8ab
diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t
14f8ab
new file mode 100644
14f8ab
index 0000000..56b5329
14f8ab
--- /dev/null
14f8ab
+++ b/tests/basic/ec/ec-quorum-count.t
14f8ab
@@ -0,0 +1,165 @@
14f8ab
+ #!/bin/bash
14f8ab
+
14f8ab
+. $(dirname $0)/../../include.rc
14f8ab
+. $(dirname $0)/../../volume.rc
14f8ab
+. $(dirname $0)/../../ec.rc
14f8ab
+
14f8ab
+cleanup
14f8ab
+TEST glusterd
14f8ab
+TEST pidof glusterd
14f8ab
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
14f8ab
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
14f8ab
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
14f8ab
+TEST $CLI volume set $V0 performance.flush-behind off
14f8ab
+
14f8ab
+#Should fail on non-disperse volume
14f8ab
+TEST ! $CLI volume set $V1 disperse.quorum-count 5
14f8ab
+
14f8ab
+#Should succeed on a valid range
14f8ab
+TEST ! $CLI volume set $V0 disperse.quorum-count 0
14f8ab
+TEST ! $CLI volume set $V0 disperse.quorum-count -0
14f8ab
+TEST ! $CLI volume set $V0 disperse.quorum-count abc
14f8ab
+TEST ! $CLI volume set $V0 disperse.quorum-count 10abc
14f8ab
+TEST ! $CLI volume set $V0 disperse.quorum-count 1
14f8ab
+TEST ! $CLI volume set $V0 disperse.quorum-count 2
14f8ab
+TEST ! $CLI volume set $V0 disperse.quorum-count 3
14f8ab
+TEST $CLI volume set $V0 disperse.quorum-count 4
14f8ab
+TEST $CLI volume start $V0
14f8ab
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
14f8ab
+
14f8ab
+#Test that the option is reflected in the mount
14f8ab
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count
14f8ab
+TEST $CLI volume reset $V0 disperse.quorum-count
14f8ab
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
14f8ab
+TEST $CLI volume set $V0 disperse.quorum-count 6
14f8ab
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count
14f8ab
+
14f8ab
+TEST touch $M0/a
14f8ab
+TEST touch $M0/data
14f8ab
+TEST setfattr -n trusted.def -v def $M0/a
14f8ab
+TEST touch $M0/src
14f8ab
+TEST touch $M0/del-me
14f8ab
+TEST mkdir $M0/dir1
14f8ab
+TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct
14f8ab
+TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
14f8ab
+TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file
14f8ab
+#modify operations should fail as the file is not in quorum
14f8ab
+TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}0
14f8ab
+#Read should succeed even when quorum-count is not met
14f8ab
+TEST dd if=$M0/read-file of=/dev/null iflag=direct
14f8ab
+TEST ! touch $M0/a2
14f8ab
+TEST ! mkdir $M0/dir2
14f8ab
+TEST ! mknod  $M0/b2 b 4 5
14f8ab
+TEST ! ln -s $M0/a $M0/symlink
14f8ab
+TEST ! ln $M0/a $M0/link
14f8ab
+TEST ! mv $M0/src $M0/dst
14f8ab
+TEST ! rm -f $M0/del-me
14f8ab
+TEST ! rmdir $M0/dir1
14f8ab
+TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc
14f8ab
+TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc
14f8ab
+TEST ! truncate -s 0 $M0/a
14f8ab
+TEST ! setfattr -n trusted.abc -v abc $M0/a
14f8ab
+TEST ! setfattr -x trusted.def $M0/a
14f8ab
+TEST ! chmod +x $M0/a
14f8ab
+TEST ! fallocate -l 2m -n $M0/a
14f8ab
+TEST ! fallocate -p -l 512k $M0/a
14f8ab
+TEST $CLI volume start $V0 force
14f8ab
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
14f8ab
+
14f8ab
+# reset the option and check whether the default redundancy count is
14f8ab
+# accepted or not.
14f8ab
+TEST $CLI volume reset $V0 disperse.quorum-count
14f8ab
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
14f8ab
+TEST touch $M0/a1
14f8ab
+TEST touch $M0/data1
14f8ab
+TEST setfattr -n trusted.def -v def $M0/a1
14f8ab
+TEST touch $M0/src1
14f8ab
+TEST touch $M0/del-me1
14f8ab
+TEST mkdir $M0/dir11
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}0
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}1
14f8ab
+TEST touch $M0/a21
14f8ab
+TEST mkdir $M0/dir21
14f8ab
+TEST mknod  $M0/b21 b 4 5
14f8ab
+TEST ln -s $M0/a1 $M0/symlink1
14f8ab
+TEST ln $M0/a1 $M0/link1
14f8ab
+TEST mv $M0/src1 $M0/dst1
14f8ab
+TEST rm -f $M0/del-me1
14f8ab
+TEST rmdir $M0/dir11
14f8ab
+TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc
14f8ab
+TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc
14f8ab
+TEST truncate -s 0 $M0/a1
14f8ab
+TEST setfattr -n trusted.abc -v abc $M0/a1
14f8ab
+TEST setfattr -x trusted.def $M0/a1
14f8ab
+TEST chmod +x $M0/a1
14f8ab
+TEST fallocate -l 2m -n $M0/a1
14f8ab
+TEST fallocate -p -l 512k $M0/a1
14f8ab
+TEST $CLI volume start $V0 force
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
14f8ab
+
14f8ab
+TEST touch $M0/a2
14f8ab
+TEST touch $M0/data2
14f8ab
+TEST setfattr -n trusted.def -v def $M0/a1
14f8ab
+TEST touch $M0/src2
14f8ab
+TEST touch $M0/del-me2
14f8ab
+TEST mkdir $M0/dir12
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}0
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}1
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}2
14f8ab
+TEST ! touch $M0/a22
14f8ab
+TEST ! mkdir $M0/dir22
14f8ab
+TEST ! mknod  $M0/b22 b 4 5
14f8ab
+TEST ! ln -s $M0/a2 $M0/symlink2
14f8ab
+TEST ! ln $M0/a2 $M0/link2
14f8ab
+TEST ! mv $M0/src2 $M0/dst2
14f8ab
+TEST ! rm -f $M0/del-me2
14f8ab
+TEST ! rmdir $M0/dir12
14f8ab
+TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc
14f8ab
+TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc
14f8ab
+TEST ! truncate -s 0 $M0/a2
14f8ab
+TEST ! setfattr -n trusted.abc -v abc $M0/a2
14f8ab
+TEST ! setfattr -x trusted.def $M0/a2
14f8ab
+TEST ! chmod +x $M0/a2
14f8ab
+TEST ! fallocate -l 2m -n $M0/a2
14f8ab
+TEST ! fallocate -p -l 512k $M0/a2
14f8ab
+TEST $CLI volume start $V0 force
14f8ab
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
14f8ab
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
14f8ab
+
14f8ab
+# Set quorum-count to 5 and kill 1 brick and the fops should pass
14f8ab
+TEST $CLI volume set $V0 disperse.quorum-count 5
14f8ab
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count
14f8ab
+TEST touch $M0/a3
14f8ab
+TEST touch $M0/data3
14f8ab
+TEST setfattr -n trusted.def -v def $M0/a3
14f8ab
+TEST touch $M0/src3
14f8ab
+TEST touch $M0/del-me3
14f8ab
+TEST mkdir $M0/dir13
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}0
14f8ab
+TEST touch $M0/a31
14f8ab
+TEST mkdir $M0/dir31
14f8ab
+TEST mknod  $M0/b31 b 4 5
14f8ab
+TEST ln -s $M0/a3 $M0/symlink3
14f8ab
+TEST ln $M0/a3 $M0/link3
14f8ab
+TEST mv $M0/src3 $M0/dst3
14f8ab
+TEST rm -f $M0/del-me3
14f8ab
+TEST rmdir $M0/dir13
14f8ab
+TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc
14f8ab
+TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc
14f8ab
+TEST truncate -s 0 $M0/a3
14f8ab
+TEST setfattr -n trusted.abc -v abc $M0/a3
14f8ab
+TEST setfattr -x trusted.def $M0/a3
14f8ab
+TEST chmod +x $M0/a3
14f8ab
+TEST fallocate -l 2m -n $M0/a3
14f8ab
+TEST fallocate -p -l 512k $M0/a3
14f8ab
+TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct
14f8ab
+cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')"
14f8ab
+TEST $CLI volume start $V0 force
14f8ab
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}4
14f8ab
+TEST kill_brick $V0 $H0 $B0/${V0}5
14f8ab
+cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}')
14f8ab
+TEST [[ $cksum_before_heal == $cksum_after_heal ]]
14f8ab
+cleanup;
14f8ab
diff --git a/tests/ec.rc b/tests/ec.rc
14f8ab
index 04405ec..f18752f 100644
14f8ab
--- a/tests/ec.rc
14f8ab
+++ b/tests/ec.rc
14f8ab
@@ -7,3 +7,12 @@ function ec_up_status()
14f8ab
         local ec_id=$3
14f8ab
         grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='
14f8ab
 }
14f8ab
+
14f8ab
+function ec_option_value()
14f8ab
+{
14f8ab
+    local v=$1
14f8ab
+    local m=$2
14f8ab
+    local ec_id=$3
14f8ab
+    local opt=$4
14f8ab
+    grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}'
14f8ab
+}
14f8ab
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
14f8ab
index 92d4e5d..2e59180 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-common.c
14f8ab
+++ b/xlators/cluster/ec/src/ec-common.c
14f8ab
@@ -707,6 +707,19 @@ ec_child_select(ec_fop_data_t *fop)
14f8ab
         return 0;
14f8ab
     }
14f8ab
 
14f8ab
+    if (!fop->parent && fop->lock_count &&
14f8ab
+        (fop->locks[0].update[EC_DATA_TXN] ||
14f8ab
+         fop->locks[0].update[EC_METADATA_TXN])) {
14f8ab
+        if (ec->quorum_count && (num < ec->quorum_count)) {
14f8ab
+            gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_CHILDS_INSUFFICIENT,
14f8ab
+                   "Insufficient available children "
14f8ab
+                   "for this request (have %d, need "
14f8ab
+                   "%d). %s",
14f8ab
+                   num, ec->quorum_count, ec_msg_str(fop));
14f8ab
+            return 0;
14f8ab
+        }
14f8ab
+    }
14f8ab
+
14f8ab
     return 1;
14f8ab
 }
14f8ab
 
14f8ab
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
14f8ab
index 3c69471..eab86ee 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-common.h
14f8ab
+++ b/xlators/cluster/ec/src/ec-common.h
14f8ab
@@ -26,6 +26,30 @@ typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
14f8ab
 
14f8ab
 #define EC_FLAG_LOCK_SHARED 0x0001
14f8ab
 
14f8ab
+#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...)  \
14f8ab
+    do {                                                                       \
14f8ab
+        ec_t *__ec = fop->xl->private;                                         \
14f8ab
+        int32_t __op_ret = 0;                                                  \
14f8ab
+        int32_t __op_errno = 0;                                                \
14f8ab
+        int32_t __success_count = gf_bits_count(fop->good);                    \
14f8ab
+                                                                               \
14f8ab
+        __op_ret = op_ret;                                                     \
14f8ab
+        __op_errno = op_errno;                                                 \
14f8ab
+        if (!fop->parent && frame &&                                           \
14f8ab
+            (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) &&                  \
14f8ab
+            __ec->quorum_count && (__success_count < __ec->quorum_count) &&    \
14f8ab
+            op_ret >= 0) {                                                     \
14f8ab
+            __op_ret = -1;                                                     \
14f8ab
+            __op_errno = EIO;                                                  \
14f8ab
+            gf_msg(__ec->xl->name, GF_LOG_ERROR, 0,                            \
14f8ab
+                   EC_MSG_CHILDS_INSUFFICIENT,                                 \
14f8ab
+                   "Insufficient available children for this request "         \
14f8ab
+                   "(have %d, need %d). %s",                                   \
14f8ab
+                   __success_count, __ec->quorum_count, ec_msg_str(fop));      \
14f8ab
+        }                                                                      \
14f8ab
+        fn(frame, cookie, this, __op_ret, __op_errno, params);                 \
14f8ab
+    } while (0)
14f8ab
+
14f8ab
 enum _ec_xattrop_flags {
14f8ab
     EC_FLAG_XATTROP,
14f8ab
     EC_FLAG_DATA_DIRTY,
14f8ab
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
14f8ab
index 0b8ee21..8192462 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-dir-write.c
14f8ab
+++ b/xlators/cluster/ec/src/ec-dir-write.c
14f8ab
@@ -218,10 +218,10 @@ ec_manager_create(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.create != NULL) {
14f8ab
-                fop->cbks.create(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                 cbk->op_errno, fop->fd, fop->loc[0].inode,
14f8ab
-                                 &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
14f8ab
-                                 cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, fop->fd,
14f8ab
+                           fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
14f8ab
+                           &cbk->iatt[2], cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -390,9 +390,10 @@ ec_manager_link(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.link != NULL) {
14f8ab
-                fop->cbks.link(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                               cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
14f8ab
-                               &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
14f8ab
+                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
14f8ab
+                           cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -569,9 +570,10 @@ ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.mkdir != NULL) {
14f8ab
-                fop->cbks.mkdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
14f8ab
-                                &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
14f8ab
+                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
14f8ab
+                           cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -773,9 +775,10 @@ ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.mknod != NULL) {
14f8ab
-                fop->cbks.mknod(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                cbk->op_errno, fop->loc[0].inode, &cbk->iatt[0],
14f8ab
-                                &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
14f8ab
+                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
14f8ab
+                           cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -931,10 +934,10 @@ ec_manager_rename(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.rename != NULL) {
14f8ab
-                fop->cbks.rename(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                 cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                 &cbk->iatt[2], &cbk->iatt[3], &cbk->iatt[4],
14f8ab
-                                 cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
14f8ab
+                           &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
14f8ab
+                           &cbk->iatt[4], cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -1083,9 +1086,9 @@ ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.rmdir != NULL) {
14f8ab
-                fop->cbks.rmdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
14f8ab
+                           &cbk->iatt[1], cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -1237,10 +1240,10 @@ ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.symlink != NULL) {
14f8ab
-                fop->cbks.symlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                  cbk->op_errno, fop->loc[0].inode,
14f8ab
-                                  &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
14f8ab
-                                  cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
14f8ab
+                           &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
14f8ab
+                           cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -1392,9 +1395,9 @@ ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.unlink != NULL) {
14f8ab
-                fop->cbks.unlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                 cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                 cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
14f8ab
+                           &cbk->iatt[1], cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
14f8ab
index 8bfa3b4..2dbb4db 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-inode-write.c
14f8ab
+++ b/xlators/cluster/ec/src/ec-inode-write.c
14f8ab
@@ -185,26 +185,26 @@ ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
14f8ab
     switch (fop->id) {
14f8ab
         case GF_FOP_SETXATTR:
14f8ab
             if (fop->cbks.setxattr) {
14f8ab
-                fop->cbks.setxattr(frame, cookie, this, op_ret, op_errno,
14f8ab
-                                   xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
14f8ab
+                           op_errno, xdata);
14f8ab
             }
14f8ab
             break;
14f8ab
         case GF_FOP_REMOVEXATTR:
14f8ab
             if (fop->cbks.removexattr) {
14f8ab
-                fop->cbks.removexattr(frame, cookie, this, op_ret, op_errno,
14f8ab
-                                      xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
14f8ab
+                           op_ret, op_errno, xdata);
14f8ab
             }
14f8ab
             break;
14f8ab
         case GF_FOP_FSETXATTR:
14f8ab
             if (fop->cbks.fsetxattr) {
14f8ab
-                fop->cbks.fsetxattr(frame, cookie, this, op_ret, op_errno,
14f8ab
-                                    xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
14f8ab
+                           op_ret, op_errno, xdata);
14f8ab
             }
14f8ab
             break;
14f8ab
         case GF_FOP_FREMOVEXATTR:
14f8ab
             if (fop->cbks.fremovexattr) {
14f8ab
-                fop->cbks.fremovexattr(frame, cookie, this, op_ret, op_errno,
14f8ab
-                                       xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
14f8ab
+                           op_ret, op_errno, xdata);
14f8ab
             }
14f8ab
             break;
14f8ab
     }
14f8ab
@@ -494,16 +494,15 @@ ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
14f8ab
 
14f8ab
             if (fop->id == GF_FOP_SETATTR) {
14f8ab
                 if (fop->cbks.setattr != NULL) {
14f8ab
-                    fop->cbks.setattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                      cbk->op_errno, &cbk->iatt[0],
14f8ab
-                                      &cbk->iatt[1], cbk->xdata);
14f8ab
+                    QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
14f8ab
+                               fop->xl, cbk->op_ret, cbk->op_errno,
14f8ab
+                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
14f8ab
                 }
14f8ab
             } else {
14f8ab
                 if (fop->cbks.fsetattr != NULL) {
14f8ab
-                    fop->cbks.fsetattr(fop->req_frame, fop, fop->xl,
14f8ab
-                                       cbk->op_ret, cbk->op_errno,
14f8ab
-                                       &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                       cbk->xdata);
14f8ab
+                    QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
14f8ab
+                               fop->xl, cbk->op_ret, cbk->op_errno,
14f8ab
+                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
14f8ab
                 }
14f8ab
             }
14f8ab
 
14f8ab
@@ -994,9 +993,9 @@ ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.fallocate != NULL) {
14f8ab
-                fop->cbks.fallocate(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                    cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                    cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
14f8ab
+                           fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
14f8ab
+                           &cbk->iatt[1], cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -1247,9 +1246,9 @@ ec_manager_discard(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.discard != NULL) {
14f8ab
-                fop->cbks.discard(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                  cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                  cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
14f8ab
+                           &cbk->iatt[1], cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
@@ -1477,17 +1476,15 @@ ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
14f8ab
 
14f8ab
             if (fop->id == GF_FOP_TRUNCATE) {
14f8ab
                 if (fop->cbks.truncate != NULL) {
14f8ab
-                    fop->cbks.truncate(fop->req_frame, fop, fop->xl,
14f8ab
-                                       cbk->op_ret, cbk->op_errno,
14f8ab
-                                       &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                       cbk->xdata);
14f8ab
+                    QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
14f8ab
+                               fop->xl, cbk->op_ret, cbk->op_errno,
14f8ab
+                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
14f8ab
                 }
14f8ab
             } else {
14f8ab
                 if (fop->cbks.ftruncate != NULL) {
14f8ab
-                    fop->cbks.ftruncate(fop->req_frame, fop, fop->xl,
14f8ab
-                                        cbk->op_ret, cbk->op_errno,
14f8ab
-                                        &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                        cbk->xdata);
14f8ab
+                    QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
14f8ab
+                               fop->xl, cbk->op_ret, cbk->op_errno,
14f8ab
+                               &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
14f8ab
                 }
14f8ab
             }
14f8ab
 
14f8ab
@@ -2245,9 +2242,9 @@ ec_manager_writev(ec_fop_data_t *fop, int32_t state)
14f8ab
             GF_ASSERT(cbk != NULL);
14f8ab
 
14f8ab
             if (fop->cbks.writev != NULL) {
14f8ab
-                fop->cbks.writev(fop->req_frame, fop, fop->xl, cbk->op_ret,
14f8ab
-                                 cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
14f8ab
-                                 cbk->xdata);
14f8ab
+                QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
14f8ab
+                           cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
14f8ab
+                           &cbk->iatt[1], cbk->xdata);
14f8ab
             }
14f8ab
 
14f8ab
             return EC_STATE_LOCK_REUSE;
14f8ab
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
14f8ab
index f27f2ec..ea4f6ad 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-types.h
14f8ab
+++ b/xlators/cluster/ec/src/ec-types.h
14f8ab
@@ -654,6 +654,7 @@ struct _ec {
14f8ab
     gf_boolean_t optimistic_changelog;
14f8ab
     gf_boolean_t parallel_writes;
14f8ab
     uint32_t stripe_cache;
14f8ab
+    uint32_t quorum_count;
14f8ab
     uint32_t background_heals;
14f8ab
     uint32_t heal_wait_qlen;
14f8ab
     uint32_t self_heal_window_size; /* max size of read/writes */
14f8ab
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
14f8ab
index 3c8013e..19094c4 100644
14f8ab
--- a/xlators/cluster/ec/src/ec.c
14f8ab
+++ b/xlators/cluster/ec/src/ec.c
14f8ab
@@ -285,6 +285,7 @@ reconfigure(xlator_t *this, dict_t *options)
14f8ab
     GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
14f8ab
                      failed);
14f8ab
     GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
14f8ab
+    GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
14f8ab
     ret = 0;
14f8ab
     if (ec_assign_read_policy(ec, read_policy)) {
14f8ab
         ret = -1;
14f8ab
@@ -720,6 +721,7 @@ init(xlator_t *this)
14f8ab
                    failed);
14f8ab
     GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
14f8ab
     GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
14f8ab
+    GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
14f8ab
 
14f8ab
     this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
14f8ab
     if (!this->itable)
14f8ab
@@ -1402,6 +1404,7 @@ ec_dump_private(xlator_t *this)
14f8ab
     gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
14f8ab
     gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
14f8ab
     gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
14f8ab
+    gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
14f8ab
 
14f8ab
     snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
14f8ab
              this->type, this->name);
14f8ab
@@ -1672,6 +1675,16 @@ struct volume_options options[] = {
14f8ab
                     "lead to extra memory consumption, maximum "
14f8ab
                     "(cache size * stripe size) Bytes per open file."},
14f8ab
     {
14f8ab
+        .key = {"quorum-count"},
14f8ab
+        .type = GF_OPTION_TYPE_INT,
14f8ab
+        .default_value = "0",
14f8ab
+        .description =
14f8ab
+            "This option can be used to define how many successes on"
14f8ab
+            "the bricks constitute a success to the application. This"
14f8ab
+            " count should be in the range"
14f8ab
+            "[disperse-data-count,  disperse-count] (inclusive)",
14f8ab
+    },
14f8ab
+    {
14f8ab
         .key = {NULL},
14f8ab
     },
14f8ab
 };
14f8ab
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
14f8ab
index 8ce338e..7ca47a6 100644
14f8ab
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
14f8ab
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
14f8ab
@@ -1128,6 +1128,42 @@ out:
14f8ab
 }
14f8ab
 
14f8ab
 static int
14f8ab
+validate_disperse_quorum_count(glusterd_volinfo_t *volinfo, dict_t *dict,
14f8ab
+                               char *key, char *value, char **op_errstr)
14f8ab
+{
14f8ab
+    int ret = -1;
14f8ab
+    int quorum_count = 0;
14f8ab
+    int data_count = 0;
14f8ab
+
14f8ab
+    ret = gf_string2int(value, &quorum_count);
14f8ab
+    if (ret) {
14f8ab
+        gf_asprintf(op_errstr,
14f8ab
+                    "%s is not an integer. %s expects a "
14f8ab
+                    "valid integer value.",
14f8ab
+                    value, key);
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+
14f8ab
+    if (volinfo->type != GF_CLUSTER_TYPE_DISPERSE) {
14f8ab
+        gf_asprintf(op_errstr, "Cannot set %s for a non-disperse volume.", key);
14f8ab
+        ret = -1;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+
14f8ab
+    data_count = volinfo->disperse_count - volinfo->redundancy_count;
14f8ab
+    if (quorum_count < data_count || quorum_count > volinfo->disperse_count) {
14f8ab
+        gf_asprintf(op_errstr, "%d for %s is out of range [%d - %d]",
14f8ab
+                    quorum_count, key, data_count, volinfo->disperse_count);
14f8ab
+        ret = -1;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+
14f8ab
+    ret = 0;
14f8ab
+out:
14f8ab
+    return ret;
14f8ab
+}
14f8ab
+
14f8ab
+static int
14f8ab
 validate_parallel_readdir(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
14f8ab
                           char *value, char **op_errstr)
14f8ab
 {
14f8ab
@@ -3663,6 +3699,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
14f8ab
      .type = NO_DOC,
14f8ab
      .op_version = GD_OP_VERSION_3_13_0,
14f8ab
      .flags = VOLOPT_FLAG_CLIENT_OPT},
14f8ab
+    {.key = "disperse.quorum-count",
14f8ab
+     .voltype = "cluster/disperse",
14f8ab
+     .type = NO_DOC,
14f8ab
+     .op_version = GD_OP_VERSION_8_0,
14f8ab
+     .validate_fn = validate_disperse_quorum_count,
14f8ab
+     .description = "This option can be used to define how many successes on"
14f8ab
+                    "the bricks constitute a success to the application. This"
14f8ab
+                    " count should be in the range"
14f8ab
+                    "[disperse-data-count,  disperse-count] (inclusive)",
14f8ab
+     .flags = VOLOPT_FLAG_CLIENT_OPT},
14f8ab
     {
14f8ab
         .key = "features.sdfs",
14f8ab
         .voltype = "features/sdfs",
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab