3604df
From 7e14fd1769c2e5e189efbeebed997ebcf7a020c1 Mon Sep 17 00:00:00 2001
3604df
From: Pranith Kumar K <pkarampu@redhat.com>
3604df
Date: Thu, 2 Mar 2017 07:14:14 +0530
3604df
Subject: [PATCH 302/302] cluster/ec: Introduce optimistic changelog in EC
3604df
3604df
	Backport of https://review.gluster.org/16821
3604df
3604df
Problem: Fix to https://bugzilla.redhat.com/show_bug.cgi?id=1316873 has made
3604df
changes to set dirty flag before every update fop, data or metadata, and unset
3604df
it after successful operation. That makes some of the fops very slow such as
3604df
entry operations or metadata operations.
3604df
3604df
Solution: File data operations are the only operation which take some time and
3604df
setting dirty flag before a fop and unsetting it after serves the purpose as
3604df
probability of failure of a fop is high when the time duration is more. For all
3604df
the other operations, set dirty flag at the end of the fop, if any brick is
3604df
down and need heal.
3604df
3604df
Providing following option to choose between high performance or better heal
3604df
marking for metadata and entry fops.
3604df
3604df
Set/Unset dirty flag for every update fop at the start of the fop. If ON, this
3604df
option impacts performance of entry operations or metadata operations as it
3604df
will set dirty flag at the start and unset it at the end of ALL update fop. If
3604df
OFF and all the bricks are good, dirty flag will be set at the start only for
3604df
file fops For metadata and entry fops dirty flag will not be set at the start,
3604df
if all the bricks are good. This does not impact performance for metadata
3604df
operations and entry operation but has a very small window to miss marking
3604df
entry as dirty in case it is required to be healed.
3604df
3604df
Thanks to Xavi and Ashish for the design
3604df
Picked the .t file from Ashish' patch https://review.gluster.org/16298
3604df
3604df
 >BUG: 1408809
3604df
 >Change-Id: I3ce860063f0e2901e50754dcfc3e4ed22daf819f
3604df
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
3604df
3604df
BUG: 1408655
3604df
Change-Id: Ia8f2e9c5f39d8306ab8e8dcda7cf75a92519e3d7
3604df
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/99318
3604df
---
3604df
 libglusterfs/src/globals.h                      |   4 +-
3604df
 tests/basic/ec/ec-optimistic-changelog.t        | 152 ++++++++++++++++++++++++
3604df
 xlators/cluster/ec/src/ec-common.c              |  49 +++++++-
3604df
 xlators/cluster/ec/src/ec-data.h                |   3 +-
3604df
 xlators/cluster/ec/src/ec-generic.c             |  14 ++-
3604df
 xlators/cluster/ec/src/ec.c                     |  21 +++-
3604df
 xlators/cluster/ec/src/ec.h                     |   1 +
3604df
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |   6 +
3604df
 8 files changed, 243 insertions(+), 7 deletions(-)
3604df
 create mode 100644 tests/basic/ec/ec-optimistic-changelog.t
3604df
3604df
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
3604df
index f6164c6..bbddb21 100644
3604df
--- a/libglusterfs/src/globals.h
3604df
+++ b/libglusterfs/src/globals.h
3604df
@@ -43,7 +43,7 @@
3604df
  */
3604df
 #define GD_OP_VERSION_MIN  1 /* MIN is the fresh start op-version, mostly
3604df
                                 should not change */
3604df
-#define GD_OP_VERSION_MAX  GD_OP_VERSION_3_9_1 /* MAX VERSION is the maximum
3604df
+#define GD_OP_VERSION_MAX  GD_OP_VERSION_3_10_1 /* MAX VERSION is the maximum
3604df
                                                   count in VME table, should
3604df
                                                   keep changing with
3604df
                                                   introduction of newer
3604df
@@ -85,6 +85,8 @@
3604df
 
3604df
 #define GD_OP_VERSION_3_9_1    30901 /* Op-version for GlusterFS 3.9.1 */
3604df
 
3604df
+#define GD_OP_VERSION_3_10_1   31001 /* Op-version for GlusterFS 3.10.1 */
3604df
+
3604df
 #include "xlator.h"
3604df
 
3604df
 /* THIS */
3604df
diff --git a/tests/basic/ec/ec-optimistic-changelog.t b/tests/basic/ec/ec-optimistic-changelog.t
3604df
new file mode 100644
3604df
index 0000000..1277da6
3604df
--- /dev/null
3604df
+++ b/tests/basic/ec/ec-optimistic-changelog.t
3604df
@@ -0,0 +1,152 @@
3604df
+#!/bin/bash
3604df
+
3604df
+. $(dirname $0)/../../include.rc
3604df
+. $(dirname $0)/../../volume.rc
3604df
+
3604df
+# This test checks optimistic-change-log option
3604df
+
3604df
+cleanup
3604df
+TEST glusterd
3604df
+TEST pidof glusterd
3604df
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
3604df
+TEST $CLI volume heal $V0 disable
3604df
+
3604df
+TEST $CLI volume set $V0 performance.stat-prefetch off
3604df
+TEST $CLI volume set $V0 performance.write-behind off
3604df
+TEST $CLI volume set $V0 performance.quick-read off
3604df
+TEST $CLI volume set $V0 performance.read-ahead off
3604df
+TEST $CLI volume set $V0 performance.io-cache off
3604df
+TEST $CLI volume set $V0 disperse.background-heals 0
3604df
+TEST $CLI volume set $V0 disperse.optimistic-change-log off
3604df
+TEST $CLI volume set $V0 disperse.eager-lock off
3604df
+TEST $CLI volume start $V0
3604df
+
3604df
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
3604df
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
3604df
+
3604df
+TEST $CLI volume set $V0 disperse.background-heals 1
3604df
+TEST touch $M0/a
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}0
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}1
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}2
3604df
+
3604df
+
3604df
+
3604df
+### optimistic-change-log = off ; All bricks good. Test file operation
3604df
+echo abc > $M0/a
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = off ; Kill one brick . Test file operation
3604df
+TEST kill_brick $V0 $H0 $B0/${V0}2
3604df
+echo abc > $M0/a
3604df
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
3604df
+$CLI volume start $V0 force
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
+#Accessing file should heal the file now
3604df
+EXPECT "abc" cat $M0/a
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = off ; All bricks good. Test entry operation
3604df
+TEST touch $M0/b
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = off ; All bricks good. Test metadata operation
3604df
+TEST chmod 0777 $M0/b
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = off ; Kill one brick. Test entry operation
3604df
+
3604df
+TEST kill_brick $V0 $H0 $B0/${V0}2
3604df
+TEST touch $M0/c
3604df
+EXPECT 4 get_pending_heal_count $V0 #two for each active brick
3604df
+$CLI volume start $V0 force
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
+getfattr -d -m. -e hex $M0 2>&1 > /dev/null
3604df
+getfattr -d -m. -e hex $M0/c 2>&1 > /dev/null
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = off ; Kill one brick. Test metadata operation
3604df
+TEST kill_brick $V0 $H0 $B0/${V0}2
3604df
+TEST chmod 0777 $M0/c
3604df
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
3604df
+$CLI volume start $V0 force
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
+getfattr -d -m. -e hex $M0/c 2>&1 > /dev/null
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
3604df
+
3604df
+### optimistic-change-log = on ; All bricks good. Test file operation
3604df
+
3604df
+echo abc > $M0/aa
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = on ; Kill one brick. Test file operation
3604df
+
3604df
+TEST kill_brick $V0 $H0 $B0/${V0}2
3604df
+echo abc > $M0/aa
3604df
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
3604df
+$CLI volume start $V0 force
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
+#Accessing file should heal the file now
3604df
+getfattr -d -m. -e hex $M0/aa 2>&1 > /dev/null
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = on ; All bricks good. Test entry operation
3604df
+
3604df
+TEST touch $M0/bb
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = on ; All bricks good. Test metadata operation
3604df
+
3604df
+TEST chmod 0777 $M0/bb
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = on ; Kill one brick. Test entry operation
3604df
+
3604df
+TEST kill_brick $V0 $H0 $B0/${V0}2
3604df
+TEST touch $M0/cc
3604df
+EXPECT 4 get_pending_heal_count $V0 #two for each active brick
3604df
+$CLI volume start $V0 force
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
+getfattr -d -m. -e hex $M0 2>&1 > /dev/null
3604df
+getfattr -d -m. -e hex $M0/cc 2>&1 > /dev/null
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+## optimistic-change-log = on ; Kill one brick. Test metadata operation
3604df
+
3604df
+TEST kill_brick $V0 $H0 $B0/${V0}2
3604df
+TEST chmod 0777 $M0/cc
3604df
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
3604df
+$CLI volume start $V0 force
3604df
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
3604df
+getfattr -d -m. -e hex $M0/cc 2>&1 > /dev/null
3604df
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
3604df
+
3604df
+############################################################
3604df
+
3604df
+cleanup
3604df
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
3604df
index 3064af6..647e750 100644
3604df
--- a/xlators/cluster/ec/src/ec-common.c
3604df
+++ b/xlators/cluster/ec/src/ec-common.c
3604df
@@ -926,16 +926,19 @@ ec_config_check (ec_fop_data_t *fop, ec_config_t *config)
3604df
 }
3604df
 
3604df
 gf_boolean_t
3604df
-ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty)
3604df
+ec_set_dirty_flag (ec_lock_link_t *link, ec_inode_t *ctx,
3604df
+                   uint64_t *dirty)
3604df
 {
3604df
 
3604df
     gf_boolean_t set_dirty = _gf_false;
3604df
 
3604df
     if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) {
3604df
+            if (!link->optimistic_changelog)
3604df
                 dirty[EC_DATA_TXN] = 1;
3604df
     }
3604df
 
3604df
     if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) {
3604df
+            if (!link->optimistic_changelog)
3604df
                 dirty[EC_METADATA_TXN] = 1;
3604df
     }
3604df
 
3604df
@@ -956,6 +959,7 @@ ec_prepare_update_cbk (call_frame_t *frame, void *cookie,
3604df
     ec_lock_link_t *link = fop->data;
3604df
     ec_lock_t *lock = NULL;
3604df
     ec_inode_t *ctx;
3604df
+    gf_boolean_t release = _gf_false;
3604df
 
3604df
     lock = link->lock;
3604df
     parent = link->fop;
3604df
@@ -1049,6 +1053,26 @@ unlock:
3604df
     UNLOCK(&lock->loc.inode->lock);
3604df
 
3604df
     if (op_errno == 0) {
3604df
+        /* If the fop fails on any of the good bricks, it is important to mark
3604df
+         * it dirty and update versions right away if dirty was not set before.
3604df
+         */
3604df
+        if (lock->good_mask & ~(fop->good | fop->remaining)) {
3604df
+                release = _gf_true;
3604df
+        }
3604df
+
3604df
+        /* lock->release is a critical field that is checked and modified most
3604df
+         * of the time inside a locked region. This use here is safe because we
3604df
+         * are in a modifying fop and we currently don't allow two modifying
3604df
+         * fops to be processed concurrently, so no one else could be checking
3604df
+         * or modifying it.*/
3604df
+        if (link->update[0] && !link->dirty[0]) {
3604df
+                lock->release |= release;
3604df
+        }
3604df
+
3604df
+        if (link->update[1] && !link->dirty[1]) {
3604df
+                lock->release |= release;
3604df
+        }
3604df
+
3604df
         /* We don't allow the main fop to be executed on bricks that have not
3604df
          * succeeded the initial xattrop. */
3604df
         parent->mask &= fop->good;
3604df
@@ -1091,6 +1115,7 @@ void ec_get_size_version(ec_lock_link_t *link)
3604df
     ec_inode_t *ctx;
3604df
     ec_fop_data_t *fop;
3604df
     dict_t *dict = NULL;
3604df
+    ec_t   *ec = NULL;
3604df
     int32_t error = 0;
3604df
     gf_boolean_t getting_xattr;
3604df
     gf_boolean_t set_dirty = _gf_false;
3604df
@@ -1099,6 +1124,17 @@ void ec_get_size_version(ec_lock_link_t *link)
3604df
     lock = link->lock;
3604df
     ctx = lock->ctx;
3604df
     fop = link->fop;
3604df
+    ec  = fop->xl->private;
3604df
+
3604df
+    if (ec->optimistic_changelog &&
3604df
+        !(ec->node_mask & ~link->lock->good_mask) && !ec_is_data_fop (fop->id))
3604df
+            link->optimistic_changelog = _gf_true;
3604df
+
3604df
+    /* If ctx->have_info is false and lock->query is true, it means that we'll
3604df
+     * send the xattrop anyway, so we can use it to update dirty counts, even
3604df
+     * if it's not necessary to do it right now. */
3604df
+    if (!ctx->have_info && lock->query)
3604df
+            link->optimistic_changelog = _gf_false;
3604df
 
3604df
     set_dirty = ec_set_dirty_flag (link, ctx, dirty);
3604df
 
3604df
@@ -1709,6 +1745,13 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
3604df
         if (link->update[1]) {
3604df
             ctx->post_version[1]++;
3604df
         }
3604df
+        /* If the fop fails on any of the good bricks, it is important to mark
3604df
+         * it dirty and update versions right away. */
3604df
+        if (link->update[0] || link->update[1]) {
3604df
+                if (lock->good_mask & ~(fop->good | fop->remaining)) {
3604df
+                        lock->release = _gf_true;
3604df
+                }
3604df
+        }
3604df
     }
3604df
 
3604df
     ec_lock_update_good(lock, fop);
3604df
@@ -2024,9 +2067,13 @@ ec_update_info(ec_lock_link_t *link)
3604df
                     if (ctx->dirty[1] != 0) {
3604df
                         dirty[1] = -1;
3604df
                     }
3604df
+            } else {
3604df
+                    link->optimistic_changelog = _gf_false;
3604df
+                    ec_set_dirty_flag (link, ctx, dirty);
3604df
             }
3604df
             memset(ctx->dirty, 0, sizeof(ctx->dirty));
3604df
     }
3604df
+
3604df
     if ((version[0] != 0) || (version[1] != 0) ||
3604df
         (dirty[0] != 0) || (dirty[1] != 0)) {
3604df
         ec_update_size_version(link, version, size, dirty);
3604df
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
3604df
index c3ec5cb..ddb9fab 100644
3604df
--- a/xlators/cluster/ec/src/ec-data.h
3604df
+++ b/xlators/cluster/ec/src/ec-data.h
3604df
@@ -184,6 +184,8 @@ struct _ec_lock_link
3604df
     struct list_head  owner_list;
3604df
     struct list_head  wait_list;
3604df
     gf_boolean_t      update[2];
3604df
+    gf_boolean_t      dirty[2];
3604df
+    gf_boolean_t      optimistic_changelog;
3604df
     loc_t            *base;
3604df
     uint64_t          size;
3604df
 };
3604df
@@ -271,7 +273,6 @@ struct _ec_cbk_data
3604df
     int32_t          op_errno;
3604df
     int32_t          count;
3604df
     uintptr_t        mask;
3604df
-    uint64_t         dirty[2];
3604df
 
3604df
     dict_t *         xdata;
3604df
     dict_t *         dict;
3604df
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
3604df
index 37b3b78..878277f 100644
3604df
--- a/xlators/cluster/ec/src/ec-generic.c
3604df
+++ b/xlators/cluster/ec/src/ec-generic.c
3604df
@@ -696,6 +696,7 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
3604df
     ec_fop_data_t * fop = NULL;
3604df
     ec_cbk_data_t * cbk = NULL;
3604df
     int32_t idx = (int32_t)(uintptr_t)cookie;
3604df
+    uint64_t       dirty[2] = {0};
3604df
 
3604df
     VALIDATE_OR_GOTO(this, out);
3604df
     GF_VALIDATE_OR_GOTO(this->name, frame, out);
3604df
@@ -745,8 +746,7 @@ int32_t ec_lookup_cbk(call_frame_t * frame, void * cookie, xlator_t * this,
3604df
 
3604df
                 goto out;
3604df
             }
3604df
-            ec_dict_del_array (xdata, EC_XATTR_DIRTY, cbk->dirty,
3604df
-                               EC_VERSION_SIZE);
3604df
+            ec_dict_del_array (xdata, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
3604df
         }
3604df
 
3604df
         ec_combine(cbk, ec_combine_lookup);
3604df
@@ -1141,7 +1141,9 @@ ec_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                 dict_t *xdata)
3604df
 {
3604df
         ec_fop_data_t *fop = NULL;
3604df
+        ec_lock_link_t *link = NULL;
3604df
         ec_cbk_data_t *cbk = NULL;
3604df
+        uint64_t       dirty[2] = {0};
3604df
         data_t *data;
3604df
         uint64_t *version;
3604df
         int32_t idx = (int32_t)(uintptr_t)cookie;
3604df
@@ -1177,8 +1179,14 @@ ec_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                     }
3604df
                 }
3604df
 
3604df
-                ec_dict_del_array (xattr, EC_XATTR_DIRTY, cbk->dirty,
3604df
+                ec_dict_del_array (xattr, EC_XATTR_DIRTY, dirty,
3604df
                                    EC_VERSION_SIZE);
3604df
+                link = fop->data;
3604df
+                if (link) {
3604df
+                        /*Keep a note of if the dirty is already set or not*/
3604df
+                        link->dirty[0] |= (dirty[0] != 0);
3604df
+                        link->dirty[1] |= (dirty[1] != 0);
3604df
+                }
3604df
         }
3604df
 
3604df
         if (xdata)
3604df
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
3604df
index bcdb9fa..7eeff30 100644
3604df
--- a/xlators/cluster/ec/src/ec.c
3604df
+++ b/xlators/cluster/ec/src/ec.c
3604df
@@ -281,6 +281,8 @@ reconfigure (xlator_t *this, dict_t *options)
3604df
         GF_OPTION_RECONF ("shd-wait-qlength", ec->shd.wait_qlength,
3604df
                           options, uint32, failed);
3604df
 
3604df
+        GF_OPTION_RECONF ("optimistic-change-log", ec->optimistic_changelog,
3604df
+                          options, bool, failed);
3604df
         return 0;
3604df
 failed:
3604df
         return -1;
3604df
@@ -639,6 +641,7 @@ init (xlator_t *this)
3604df
 
3604df
     GF_OPTION_INIT ("shd-max-threads", ec->shd.max_threads, uint32, failed);
3604df
     GF_OPTION_INIT ("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed);
3604df
+    GF_OPTION_INIT ("optimistic-change-log", ec->optimistic_changelog, bool, failed);
3604df
 
3604df
     this->itable = inode_table_new (EC_SHD_INODE_LRU_LIMIT, this);
3604df
     if (!this->itable)
3604df
@@ -1415,5 +1418,21 @@ struct volume_options options[] =
3604df
       .description = "This option can be used to control number of heals"
3604df
                      " that can wait in SHD per subvolume"
3604df
     },
3604df
-    { }
3604df
+    {   .key = {"optimistic-change-log"},
3604df
+        .type = GF_OPTION_TYPE_BOOL,
3604df
+        .default_value = "on",
3604df
+        .description =  "Set/Unset dirty flag for every update fop at the start"
3604df
+                        "of the fop. If OFF, this option impacts performance of"
3604df
+                        "entry  operations or metadata operations as it will"
3604df
+                        "set dirty flag at the start and unset it at the end of"
3604df
+                        "ALL update fop. If ON and all the bricks are good,"
3604df
+                        "dirty flag will be set at the start only for file fops"
3604df
+                        "For metadata and entry fops dirty flag will not be set"
3604df
+                        "at the start, if all the bricks are good. This does"
3604df
+                        "not impact performance for metadata operations and"
3604df
+                        "entry operation but has a very small window to miss"
3604df
+                        "marking entry as dirty in case it is required to be"
3604df
+                        "healed"
3604df
+    },
3604df
+    { .key = {NULL} }
3604df
 };
3604df
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
3604df
index 49af5c2..bded652 100644
3604df
--- a/xlators/cluster/ec/src/ec.h
3604df
+++ b/xlators/cluster/ec/src/ec.h
3604df
@@ -55,6 +55,7 @@ struct _ec
3604df
     gf_timer_t *      timer;
3604df
     gf_boolean_t      shutdown;
3604df
     gf_boolean_t      eager_lock;
3604df
+    gf_boolean_t      optimistic_changelog;
3604df
     uint32_t          background_heals;
3604df
     uint32_t          heal_wait_qlen;
3604df
     struct list_head  pending_fops;
3604df
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
3604df
index 873ff99..36874f5 100644
3604df
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
3604df
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
3604df
@@ -3038,6 +3038,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
3604df
           .flags       = OPT_FLAG_CLIENT_OPT,
3604df
           .op_version  = GD_OP_VERSION_3_9_1,
3604df
         },
3604df
+        { .key        = "disperse.optimistic-change-log",
3604df
+          .voltype    = "cluster/disperse",
3604df
+          .type       = NO_DOC,
3604df
+          .op_version = GD_OP_VERSION_3_10_1,
3604df
+          .flags      = OPT_FLAG_CLIENT_OPT
3604df
+        },
3604df
         { .key         = NULL
3604df
         }
3604df
 };
3604df
-- 
3604df
2.9.3
3604df