190130
From dc03340654d921916ac3890d713fc84ef4bb1e28 Mon Sep 17 00:00:00 2001
190130
From: Mohit Agrawal <moagrawal@redhat.com>
190130
Date: Sat, 29 Sep 2018 13:15:35 +0530
190130
Subject: [PATCH 445/449] feature/changelog: Avoid thread creation if xlator is
190130
 not enabled
190130
190130
Problem:
190130
Changelog creates threads even if the changelog is not enabled
190130
190130
Background:
190130
Changelog xlator broadly does two things
190130
  1. Journalling - Cosumers are geo-rep and glusterfind
190130
  2. Event Notification for registered events like (open, release etc) -
190130
     Consumers are bitrot, geo-rep
190130
190130
The existing option "changelog.changelog" controls journalling and
190130
there is no option to control event notification and is enabled by
190130
default. So when bitrot/geo-rep is not enabled on the volume, threads
190130
and resources(rpc and rbuf) related to event notifications consumes
190130
resources and cpu cycle which is unnecessary.
190130
190130
Solution:
190130
The solution is to have two different options as below.
190130
 1. changelog-notification : Event notifications
190130
 2. changelog : Journalling
190130
190130
This patch introduces the option "changelog-notification" which is
190130
not exposed to user. When either bitrot or changelog (journalling)
190130
is enabled, it internally enbales 'changelog-notification'. But
190130
once the 'changelog-notification' is enabled, it will not be disabled
190130
for the life time of the brick process even after bitrot and changelog
190130
is disabled. As of now, rpc resource cleanup has lot of races and is
190130
difficult to cleanup cleanly. If allowed, it leads to memory leaks
190130
and crashes on enable/disable of bitrot or changelog (journal) in a
190130
loop. Hence to be safer, the event notification is not disabled within
190130
lifetime of process once enabled.
190130
190130
> Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a
190130
> Updates: #475
190130
> Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
190130
> (Cherry pick from commit 6de80bcd6366778ac34ce58ec496fa08cc02bd0b)
190130
> (Reviewed on upstream link https://review.gluster.org/#/c/glusterfs/+/21896/)
190130
190130
BUG: 1790336
190130
Change-Id: Ifd00286e0966049e8eb9f21567fe407cf11bb02a
190130
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
190130
Reviewed-on: https://code.engineering.redhat.com/gerrit/202778
190130
Tested-by: Mohit Agrawal <moagrawa@redhat.com>
190130
Tested-by: RHGS Build Bot <nigelb@redhat.com>
190130
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
190130
---
190130
 rpc/rpc-lib/src/rpcsvc.c                           |  26 ++--
190130
 tests/basic/changelog/changelog-history.t          |  12 +-
190130
 tests/bugs/bitrot/bug-1227996.t                    |   1 -
190130
 tests/bugs/bitrot/bug-1245981.t                    |   4 +-
190130
 xlators/features/changelog/src/changelog-helpers.h |   4 +
190130
 .../features/changelog/src/changelog-rpc-common.c  |   3 +
190130
 xlators/features/changelog/src/changelog.c         | 149 +++++++++++++++------
190130
 xlators/mgmt/glusterd/src/glusterd-volgen.c        |  13 ++
190130
 8 files changed, 154 insertions(+), 58 deletions(-)
190130
190130
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
190130
index b058932..3f184bf 100644
190130
--- a/rpc/rpc-lib/src/rpcsvc.c
190130
+++ b/rpc/rpc-lib/src/rpcsvc.c
190130
@@ -1865,6 +1865,18 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
190130
         goto out;
190130
     }
190130
 
190130
+    pthread_rwlock_rdlock(&svc->rpclock);
190130
+    {
190130
+        list_for_each_entry(prog, &svc->programs, program)
190130
+        {
190130
+            if ((prog->prognum == program->prognum) &&
190130
+                (prog->progver == program->progver)) {
190130
+                break;
190130
+            }
190130
+        }
190130
+    }
190130
+    pthread_rwlock_unlock(&svc->rpclock);
190130
+
190130
     ret = rpcsvc_program_unregister_portmap(program);
190130
     if (ret == -1) {
190130
         gf_log(GF_RPCSVC, GF_LOG_ERROR,
190130
@@ -1881,17 +1893,6 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
190130
         goto out;
190130
     }
190130
 #endif
190130
-    pthread_rwlock_rdlock(&svc->rpclock);
190130
-    {
190130
-        list_for_each_entry(prog, &svc->programs, program)
190130
-        {
190130
-            if ((prog->prognum == program->prognum) &&
190130
-                (prog->progver == program->progver)) {
190130
-                break;
190130
-            }
190130
-        }
190130
-    }
190130
-    pthread_rwlock_unlock(&svc->rpclock);
190130
 
190130
     gf_log(GF_RPCSVC, GF_LOG_DEBUG,
190130
            "Program unregistered: %s, Num: %d,"
190130
@@ -1912,6 +1913,9 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
190130
 
190130
     ret = 0;
190130
 out:
190130
+    if (prog)
190130
+        GF_FREE(prog);
190130
+
190130
     if (ret == -1) {
190130
         if (program) {
190130
             gf_log(GF_RPCSVC, GF_LOG_ERROR,
190130
diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t
190130
index 3ce4098..b56e247 100644
190130
--- a/tests/basic/changelog/changelog-history.t
190130
+++ b/tests/basic/changelog/changelog-history.t
190130
@@ -5,6 +5,7 @@
190130
 
190130
 cleanup;
190130
 
190130
+SCRIPT_TIMEOUT=300
190130
 HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
190130
 build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog
190130
 
190130
@@ -68,18 +69,21 @@ TEST $CLI volume set $V0 changelog.changelog off
190130
 sleep 3
190130
 time_after_disable=$(date '+%s')
190130
 
190130
+TEST $CLI volume set $V0 changelog.changelog on
190130
+sleep 5
190130
+
190130
 #Passes, gives the changelogs till continuous changelogs are available
190130
 # but returns 1
190130
-EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
190130
+EXPECT_WITHIN 10 "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
190130
 
190130
 #Fails as start falls between htime files
190130
-EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
190130
+EXPECT_WITHIN 10 "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
190130
 
190130
 #Passes as start and end falls in same htime file
190130
-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
190130
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
190130
 
190130
 #Passes, gives the changelogs till continuous changelogs are available
190130
-EXPECT "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
190130
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
190130
 
190130
 TEST rm $HISTORY_BIN_PATH/get-history
190130
 
190130
diff --git a/tests/bugs/bitrot/bug-1227996.t b/tests/bugs/bitrot/bug-1227996.t
190130
index 47ebc42..121c7b5 100644
190130
--- a/tests/bugs/bitrot/bug-1227996.t
190130
+++ b/tests/bugs/bitrot/bug-1227996.t
190130
@@ -17,7 +17,6 @@ TEST pidof glusterd;
190130
 ## Lets create and start the volume
190130
 TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
190130
 TEST $CLI volume start $V0
190130
-
190130
 ## Enable bitrot on volume $V0
190130
 TEST $CLI volume bitrot $V0 enable
190130
 
190130
diff --git a/tests/bugs/bitrot/bug-1245981.t b/tests/bugs/bitrot/bug-1245981.t
190130
index 2bed4d9..f395525 100644
190130
--- a/tests/bugs/bitrot/bug-1245981.t
190130
+++ b/tests/bugs/bitrot/bug-1245981.t
190130
@@ -47,9 +47,9 @@ touch $M0/5
190130
 sleep `expr $SLEEP_TIME \* 2`
190130
 
190130
 backpath=$(get_backend_paths $fname)
190130
-TEST getfattr -m . -n trusted.bit-rot.signature $backpath
190130
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
190130
 
190130
 backpath=$(get_backend_paths $M0/new_file)
190130
-TEST getfattr -m . -n trusted.bit-rot.signature $backpath
190130
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
190130
 
190130
 cleanup;
190130
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
190130
index 517c4dc..3afacc9 100644
190130
--- a/xlators/features/changelog/src/changelog-helpers.h
190130
+++ b/xlators/features/changelog/src/changelog-helpers.h
190130
@@ -190,8 +190,12 @@ typedef struct changelog_ev_selector {
190130
 
190130
 /* changelog's private structure */
190130
 struct changelog_priv {
190130
+    /* changelog journalling */
190130
     gf_boolean_t active;
190130
 
190130
+    /* changelog live notifications */
190130
+    gf_boolean_t rpc_active;
190130
+
190130
     /* to generate unique socket file per brick */
190130
     char *changelog_brick;
190130
 
190130
diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c
190130
index dcdcfb1..f2d1853 100644
190130
--- a/xlators/features/changelog/src/changelog-rpc-common.c
190130
+++ b/xlators/features/changelog/src/changelog-rpc-common.c
190130
@@ -263,6 +263,9 @@ changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile,
190130
     struct rpcsvc_program *prog = NULL;
190130
     rpc_transport_t *trans = NULL;
190130
 
190130
+    if (!rpc)
190130
+        return;
190130
+
190130
     while (*progs) {
190130
         prog = *progs;
190130
         (void)rpcsvc_program_unregister(rpc, prog);
190130
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
190130
index d9025f3..ff06c09 100644
190130
--- a/xlators/features/changelog/src/changelog.c
190130
+++ b/xlators/features/changelog/src/changelog.c
190130
@@ -34,6 +34,12 @@ static struct changelog_bootstrap cb_bootstrap[] = {
190130
     },
190130
 };
190130
 
190130
+static int
190130
+changelog_init_rpc(xlator_t *this, changelog_priv_t *priv);
190130
+
190130
+static int
190130
+changelog_init(xlator_t *this, changelog_priv_t *priv);
190130
+
190130
 /* Entry operations - TYPE III */
190130
 
190130
 /**
190130
@@ -2008,6 +2014,11 @@ notify(xlator_t *this, int event, void *data, ...)
190130
     uint64_t clntcnt = 0;
190130
     changelog_clnt_t *conn = NULL;
190130
     gf_boolean_t cleanup_notify = _gf_false;
190130
+    char sockfile[UNIX_PATH_MAX] = {
190130
+        0,
190130
+    };
190130
+    rpcsvc_listener_t *listener = NULL;
190130
+    rpcsvc_listener_t *next = NULL;
190130
 
190130
     INIT_LIST_HEAD(&queue);
190130
 
190130
@@ -2021,23 +2032,40 @@ notify(xlator_t *this, int event, void *data, ...)
190130
                "cleanup changelog rpc connection of brick %s",
190130
                priv->victim->name);
190130
 
190130
-        this->cleanup_starting = 1;
190130
-        changelog_destroy_rpc_listner(this, priv);
190130
-        conn = &priv->connections;
190130
-        if (conn)
190130
-            changelog_ev_cleanup_connections(this, conn);
190130
-        xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
190130
-        clntcnt = GF_ATOMIC_GET(priv->clntcnt);
190130
-
190130
-        if (!xprtcnt && !clntcnt) {
190130
-            LOCK(&priv->lock);
190130
-            {
190130
-                cleanup_notify = priv->notify_down;
190130
-                priv->notify_down = _gf_true;
190130
+        if (priv->rpc_active) {
190130
+            this->cleanup_starting = 1;
190130
+            changelog_destroy_rpc_listner(this, priv);
190130
+            conn = &priv->connections;
190130
+            if (conn)
190130
+                changelog_ev_cleanup_connections(this, conn);
190130
+            xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
190130
+            clntcnt = GF_ATOMIC_GET(priv->clntcnt);
190130
+            if (!xprtcnt && !clntcnt) {
190130
+                LOCK(&priv->lock);
190130
+                {
190130
+                    cleanup_notify = priv->notify_down;
190130
+                    priv->notify_down = _gf_true;
190130
+                }
190130
+                UNLOCK(&priv->lock);
190130
+                list_for_each_entry_safe(listener, next, &priv->rpc->listeners,
190130
+                                         list)
190130
+                {
190130
+                    if (listener->trans) {
190130
+                        rpc_transport_unref(listener->trans);
190130
+                    }
190130
+                }
190130
+                CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
190130
+                                           UNIX_PATH_MAX);
190130
+                sys_unlink(sockfile);
190130
+                if (priv->rpc) {
190130
+                    rpcsvc_destroy(priv->rpc);
190130
+                    priv->rpc = NULL;
190130
+                }
190130
+                if (!cleanup_notify)
190130
+                    default_notify(this, GF_EVENT_PARENT_DOWN, data);
190130
             }
190130
-            UNLOCK(&priv->lock);
190130
-            if (!cleanup_notify)
190130
-                default_notify(this, GF_EVENT_PARENT_DOWN, data);
190130
+        } else {
190130
+            default_notify(this, GF_EVENT_PARENT_DOWN, data);
190130
         }
190130
         goto out;
190130
     }
190130
@@ -2425,6 +2453,22 @@ changelog_barrier_pthread_destroy(changelog_priv_t *priv)
190130
     LOCK_DESTROY(&priv->bflags.lock);
190130
 }
190130
 
190130
+static void
190130
+changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
190130
+{
190130
+    /* terminate rpc server */
190130
+    if (!this->cleanup_starting)
190130
+        changelog_destroy_rpc_listner(this, priv);
190130
+
190130
+    (void)changelog_cleanup_rpc_threads(this, priv);
190130
+    /* cleanup rot buffs */
190130
+    rbuf_dtor(priv->rbuf);
190130
+
190130
+    /* cleanup poller thread */
190130
+    if (priv->poller)
190130
+        (void)changelog_thread_cleanup(this, priv->poller);
190130
+}
190130
+
190130
 int
190130
 reconfigure(xlator_t *this, dict_t *options)
190130
 {
190130
@@ -2433,6 +2477,9 @@ reconfigure(xlator_t *this, dict_t *options)
190130
     changelog_priv_t *priv = NULL;
190130
     gf_boolean_t active_earlier = _gf_true;
190130
     gf_boolean_t active_now = _gf_true;
190130
+    gf_boolean_t rpc_active_earlier = _gf_true;
190130
+    gf_boolean_t rpc_active_now = _gf_true;
190130
+    gf_boolean_t iniate_rpc = _gf_false;
190130
     changelog_time_slice_t *slice = NULL;
190130
     changelog_log_data_t cld = {
190130
         0,
190130
@@ -2454,6 +2501,7 @@ reconfigure(xlator_t *this, dict_t *options)
190130
 
190130
     ret = -1;
190130
     active_earlier = priv->active;
190130
+    rpc_active_earlier = priv->rpc_active;
190130
 
190130
     /* first stop the rollover and the fsync thread */
190130
     changelog_cleanup_helper_threads(this, priv);
190130
@@ -2487,6 +2535,29 @@ reconfigure(xlator_t *this, dict_t *options)
190130
         goto out;
190130
 
190130
     GF_OPTION_RECONF("changelog", active_now, options, bool, out);
190130
+    GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool,
190130
+                     out);
190130
+
190130
+    /* If journalling is enabled, enable rpc notifications */
190130
+    if (active_now && !active_earlier) {
190130
+        if (!rpc_active_earlier)
190130
+            iniate_rpc = _gf_true;
190130
+    }
190130
+
190130
+    if (rpc_active_now && !rpc_active_earlier) {
190130
+        iniate_rpc = _gf_true;
190130
+    }
190130
+
190130
+    /* TODO: Disable of changelog-notifications is not supported for now
190130
+     * as there is no clean way of cleaning up of rpc resources
190130
+     */
190130
+
190130
+    if (iniate_rpc) {
190130
+        ret = changelog_init_rpc(this, priv);
190130
+        if (ret)
190130
+            goto out;
190130
+        priv->rpc_active = _gf_true;
190130
+    }
190130
 
190130
     /**
190130
      * changelog_handle_change() handles changes that could possibly
190130
@@ -2618,6 +2689,7 @@ changelog_init_options(xlator_t *this, changelog_priv_t *priv)
190130
         goto dealloc_2;
190130
 
190130
     GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2);
190130
+    GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2);
190130
     GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2);
190130
 
190130
     GF_OPTION_INIT("op-mode", tmp, str, dealloc_2);
190130
@@ -2656,22 +2728,6 @@ error_return:
190130
     return -1;
190130
 }
190130
 
190130
-static void
190130
-changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
190130
-{
190130
-    /* terminate rpc server */
190130
-    if (!this->cleanup_starting)
190130
-        changelog_destroy_rpc_listner(this, priv);
190130
-
190130
-    (void)changelog_cleanup_rpc_threads(this, priv);
190130
-    /* cleanup rot buffs */
190130
-    rbuf_dtor(priv->rbuf);
190130
-
190130
-    /* cleanup poller thread */
190130
-    if (priv->poller)
190130
-        (void)changelog_thread_cleanup(this, priv->poller);
190130
-}
190130
-
190130
 static int
190130
 changelog_init_rpc(xlator_t *this, changelog_priv_t *priv)
190130
 {
190130
@@ -2768,10 +2824,13 @@ init(xlator_t *this)
190130
     INIT_LIST_HEAD(&priv->queue);
190130
     priv->barrier_enabled = _gf_false;
190130
 
190130
-    /* RPC ball rolling.. */
190130
-    ret = changelog_init_rpc(this, priv);
190130
-    if (ret)
190130
-        goto cleanup_barrier;
190130
+    if (priv->rpc_active || priv->active) {
190130
+        /* RPC ball rolling.. */
190130
+        ret = changelog_init_rpc(this, priv);
190130
+        if (ret)
190130
+            goto cleanup_barrier;
190130
+        priv->rpc_active = _gf_true;
190130
+    }
190130
 
190130
     ret = changelog_init(this, priv);
190130
     if (ret)
190130
@@ -2783,7 +2842,9 @@ init(xlator_t *this)
190130
     return 0;
190130
 
190130
 cleanup_rpc:
190130
-    changelog_cleanup_rpc(this, priv);
190130
+    if (priv->rpc_active) {
190130
+        changelog_cleanup_rpc(this, priv);
190130
+    }
190130
 cleanup_barrier:
190130
     changelog_barrier_pthread_destroy(priv);
190130
 cleanup_options:
190130
@@ -2808,9 +2869,10 @@ fini(xlator_t *this)
190130
     priv = this->private;
190130
 
190130
     if (priv) {
190130
-        /* terminate RPC server/threads */
190130
-        changelog_cleanup_rpc(this, priv);
190130
-
190130
+        if (priv->active || priv->rpc_active) {
190130
+            /* terminate RPC server/threads */
190130
+            changelog_cleanup_rpc(this, priv);
190130
+        }
190130
         /* call barrier_disable to cancel timer */
190130
         if (priv->barrier_enabled)
190130
             __chlog_barrier_disable(this, &queue);
190130
@@ -2879,6 +2941,13 @@ struct volume_options options[] = {
190130
      .flags = OPT_FLAG_SETTABLE,
190130
      .level = OPT_STATUS_BASIC,
190130
      .tags = {"journal", "georep", "glusterfind"}},
190130
+    {.key = {"changelog-notification"},
190130
+     .type = GF_OPTION_TYPE_BOOL,
190130
+     .default_value = "off",
190130
+     .description = "enable/disable changelog live notification",
190130
+     .op_version = {3},
190130
+     .level = OPT_STATUS_BASIC,
190130
+     .tags = {"bitrot", "georep"}},
190130
     {.key = {"changelog-brick"},
190130
      .type = GF_OPTION_TYPE_PATH,
190130
      .description = "brick path to generate unique socket file name."
190130
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
190130
index 16346e7..13f84ea 100644
190130
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
190130
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
190130
@@ -1876,6 +1876,19 @@ brick_graph_add_changelog(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
190130
     ret = xlator_set_fixed_option(xl, "changelog-dir", changelog_basepath);
190130
     if (ret)
190130
         goto out;
190130
+
190130
+    ret = glusterd_is_bitrot_enabled(volinfo);
190130
+    if (ret == -1) {
190130
+        goto out;
190130
+    } else if (ret) {
190130
+        ret = xlator_set_fixed_option(xl, "changelog-notification", "on");
190130
+        if (ret)
190130
+            goto out;
190130
+    } else {
190130
+        ret = xlator_set_fixed_option(xl, "changelog-notification", "off");
190130
+        if (ret)
190130
+            goto out;
190130
+    }
190130
 out:
190130
     return ret;
190130
 }
190130
-- 
190130
1.8.3.1
190130