21ab4e
From 2b898184f17dd95ee2f7c17fb4f8ec3aad598135 Mon Sep 17 00:00:00 2001
21ab4e
From: Poornima G <pgurusid@redhat.com>
21ab4e
Date: Fri, 26 May 2017 15:45:57 +0530
21ab4e
Subject: [PATCH 505/509] nl-cache: Fix a possible crash and stale cache
21ab4e
21ab4e
Issue1:
21ab4e
Consider the followinf sequence of operations:
21ab4e
   ...
21ab4e
   nlc_ctx = nlc_ctx_get (inode i1)
21ab4e
   ....... -> nlc_clear_cache (i1) gets called as a part of nlc_invalidate
21ab4e
                                   or any other callers
21ab4e
              ...
21ab4e
              GF_FREE (ii nlc_ctx)
21ab4e
   LOCK (nlc_ctx->lock);  ->  This will result in crash as the ctx
21ab4e
                              got freed in nlc_clear_cache.
21ab4e
21ab4e
Issue2:
21ab4e
   lookup on dir1/file1 result in ENOENT
21ab4e
   add cache to dir1 at time T1
21ab4e
   ....
21ab4e
   CHILD_DOWN at T2
21ab4e
   lookup on dir1/file2 result in ENOENT
21ab4e
   add cache to dir1, but the cache time is still T1
21ab4e
   lookup on dir1/file2 - should have been served from cache
21ab4e
                          but the cache time is T1 < T2, hence
21ab4e
                          cache is considered as invalid.
21ab4e
So, after CHILD_DOWN the right thing would be to clear the cache
21ab4e
and restart caching on that inode.
21ab4e
21ab4e
Solution:
21ab4e
Do not free nlc_ctx in nlc_clear_cache, but only in inode_forget()
21ab4e
The fix for both issue1 and 2 is interleaved hence sending it as
21ab4e
single patch.
21ab4e
21ab4e
> Reviewed-on: https://review.gluster.org/17453
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Signed-off-by: Poornima G <pgurusid@redhat.com>
21ab4e
21ab4e
Change-Id: I83d8ed36c049a93567c6d7e63d045dc14ccbb397
21ab4e
BUG: 1450080
21ab4e
Signed-off-by: Poornima G <pgurusid@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/108892
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 tests/basic/nl-cache.t                             |   1 +
21ab4e
 xlators/mgmt/glusterd/src/glusterd-volume-set.c    |   1 -
21ab4e
 xlators/performance/nl-cache/src/nl-cache-helper.c | 167 +++++++++++++++------
21ab4e
 xlators/performance/nl-cache/src/nl-cache.c        |  21 ++-
21ab4e
 xlators/performance/nl-cache/src/nl-cache.h        |   1 -
21ab4e
 5 files changed, 142 insertions(+), 49 deletions(-)
21ab4e
21ab4e
diff --git a/tests/basic/nl-cache.t b/tests/basic/nl-cache.t
21ab4e
index f615328..2979a9b 100755
21ab4e
--- a/tests/basic/nl-cache.t
21ab4e
+++ b/tests/basic/nl-cache.t
21ab4e
@@ -16,6 +16,7 @@ EXPECT 'on' volinfo_field $V0 'performance.nl-cache'
21ab4e
 EXPECT '600' volinfo_field $V0 'features.cache-invalidation-timeout'
21ab4e
 EXPECT 'on' volinfo_field $V0 'features.cache-invalidation'
21ab4e
 EXPECT '50000' volinfo_field $V0  'network.inode-lru-limit'
21ab4e
+TEST $CLI volume set $V0 nl-cache-positive-entry on
21ab4e
 
21ab4e
 TEST $CLI volume start $V0;
21ab4e
 EXPECT 'Started' volinfo_field $V0 'Status';
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
index dd3f4fd..d5fa23b 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
21ab4e
@@ -3148,7 +3148,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {
21ab4e
         },
21ab4e
         { .key         = "performance.nl-cache-positive-entry",
21ab4e
           .voltype     = "performance/nl-cache",
21ab4e
-          .value       = "on",
21ab4e
           .type        = DOC,
21ab4e
           .flags       = OPT_FLAG_CLIENT_OPT,
21ab4e
           .op_version  = GD_OP_VERSION_3_11_0,
21ab4e
diff --git a/xlators/performance/nl-cache/src/nl-cache-helper.c b/xlators/performance/nl-cache/src/nl-cache-helper.c
21ab4e
index edc8c16..2ca6bb9 100644
21ab4e
--- a/xlators/performance/nl-cache/src/nl-cache-helper.c
21ab4e
+++ b/xlators/performance/nl-cache/src/nl-cache-helper.c
21ab4e
@@ -67,6 +67,8 @@ int __nlc_add_to_lru (xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx);
21ab4e
 void nlc_remove_from_lru (xlator_t *this, inode_t *inode);
21ab4e
 void __nlc_inode_ctx_timer_delete (xlator_t *this, nlc_ctx_t *nlc_ctx);
21ab4e
 gf_boolean_t __nlc_search_ne (nlc_ctx_t *nlc_ctx, const char *name);
21ab4e
+void __nlc_free_pe (xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_pe_t *pe);
21ab4e
+void __nlc_free_ne (xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_ne_t *ne);
21ab4e
 
21ab4e
 static int32_t
21ab4e
 nlc_get_cache_timeout (xlator_t *this)
21ab4e
@@ -98,7 +100,8 @@ __nlc_is_cache_valid (xlator_t *this, nlc_ctx_t *nlc_ctx)
21ab4e
         }
21ab4e
         UNLOCK (&conf->lock);
21ab4e
 
21ab4e
-        if (last_val_time <= nlc_ctx->cache_time)
21ab4e
+        if ((last_val_time <= nlc_ctx->cache_time) &&
21ab4e
+            (nlc_ctx->cache_time != 0))
21ab4e
                 ret = _gf_true;
21ab4e
 out:
21ab4e
         return ret;
21ab4e
@@ -202,6 +205,88 @@ nlc_inode_ctx_get (xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
21ab4e
 }
21ab4e
 
21ab4e
 
21ab4e
+static void
21ab4e
+__nlc_inode_clear_entries (xlator_t *this, nlc_ctx_t *nlc_ctx)
21ab4e
+{
21ab4e
+        nlc_pe_t         *pe         = NULL;
21ab4e
+        nlc_pe_t         *tmp        = NULL;
21ab4e
+        nlc_ne_t         *ne         = NULL;
21ab4e
+        nlc_ne_t         *tmp1       = NULL;
21ab4e
+
21ab4e
+        if (!nlc_ctx)
21ab4e
+                goto out;
21ab4e
+
21ab4e
+        if (IS_PE_VALID (nlc_ctx->state))
21ab4e
+                list_for_each_entry_safe (pe, tmp, &nlc_ctx->pe, list) {
21ab4e
+                        __nlc_free_pe (this, nlc_ctx, pe);
21ab4e
+                }
21ab4e
+
21ab4e
+        if (IS_NE_VALID (nlc_ctx->state))
21ab4e
+                list_for_each_entry_safe (ne, tmp1, &nlc_ctx->ne, list) {
21ab4e
+                        __nlc_free_ne (this, nlc_ctx, ne);
21ab4e
+                }
21ab4e
+
21ab4e
+        nlc_ctx->cache_time = 0;
21ab4e
+        nlc_ctx->state = 0;
21ab4e
+        GF_ASSERT (nlc_ctx->cache_size == sizeof (*nlc_ctx));
21ab4e
+        GF_ASSERT (nlc_ctx->refd_inodes == 0);
21ab4e
+out:
21ab4e
+        return;
21ab4e
+}
21ab4e
+
21ab4e
+
21ab4e
+static void
21ab4e
+nlc_init_invalid_ctx (xlator_t *this, inode_t *inode, nlc_ctx_t *nlc_ctx)
21ab4e
+{
21ab4e
+        nlc_conf_t                 *conf   = NULL;
21ab4e
+        int                         ret    = -1;
21ab4e
+
21ab4e
+        conf = this->private;
21ab4e
+
21ab4e
+        LOCK (&nlc_ctx->lock);
21ab4e
+        {
21ab4e
+                if (__nlc_is_cache_valid (this, nlc_ctx))
21ab4e
+                        goto unlock;
21ab4e
+
21ab4e
+                /* The cache/nlc_ctx can be invalid for 2 reasons:
21ab4e
+                 * - Because of a child-down/timer expiry, cache is
21ab4e
+                 *   invalid but the nlc_ctx is not yet cleaned up.
21ab4e
+                 * - nlc_ctx is cleaned up, because of invalidations
21ab4e
+                 *   or lru prune etc.*/
21ab4e
+
21ab4e
+                /* If the cache is present but invalid, clear the cache and
21ab4e
+                 * reset the timer. */
21ab4e
+                __nlc_inode_clear_entries (this, nlc_ctx);
21ab4e
+
21ab4e
+                /* If timer is present, then it is already part of lru as well
21ab4e
+                 * Hence reset the timer and return.*/
21ab4e
+                if (nlc_ctx->timer) {
21ab4e
+                        gf_tw_mod_timer_pending (conf->timer_wheel,
21ab4e
+                                                 nlc_ctx->timer,
21ab4e
+                                                 conf->cache_timeout);
21ab4e
+                        time (&nlc_ctx->cache_time);
21ab4e
+                        goto unlock;
21ab4e
+                }
21ab4e
+
21ab4e
+                /* If timer was NULL, the nlc_ctx is already cleanedup,
21ab4e
+                 * and we need to start timer and add to lru, so that it is
21ab4e
+                 * ready to cache entries a fresh */
21ab4e
+                ret = __nlc_inode_ctx_timer_start (this, inode, nlc_ctx);
21ab4e
+                if (ret < 0)
21ab4e
+                        goto unlock;
21ab4e
+
21ab4e
+                ret = __nlc_add_to_lru (this, inode, nlc_ctx);
21ab4e
+                if (ret < 0) {
21ab4e
+                        __nlc_inode_ctx_timer_delete (this, nlc_ctx);
21ab4e
+                        goto unlock;
21ab4e
+                }
21ab4e
+        }
21ab4e
+unlock:
21ab4e
+        UNLOCK (&nlc_ctx->lock);
21ab4e
+
21ab4e
+        return;
21ab4e
+}
21ab4e
+
21ab4e
 static nlc_ctx_t *
21ab4e
 nlc_inode_ctx_get_set (xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
21ab4e
                        nlc_pe_t **nlc_pe_p)
21ab4e
@@ -252,8 +337,10 @@ nlc_inode_ctx_get_set (xlator_t *this, inode_t *inode, nlc_ctx_t **nlc_ctx_p,
21ab4e
 unlock:
21ab4e
         UNLOCK (&inode->lock);
21ab4e
 
21ab4e
-        if (ret == 0 && nlc_ctx_p)
21ab4e
+        if (ret == 0 && nlc_ctx_p) {
21ab4e
                 *nlc_ctx_p = nlc_ctx;
21ab4e
+                nlc_init_invalid_ctx (this, inode, nlc_ctx);
21ab4e
+        }
21ab4e
 
21ab4e
         if (ret < 0 && nlc_ctx) {
21ab4e
                 LOCK_DESTROY (&nlc_ctx->lock);
21ab4e
@@ -261,6 +348,7 @@ unlock:
21ab4e
                 nlc_ctx = NULL;
21ab4e
                 goto out;
21ab4e
         }
21ab4e
+
21ab4e
 out:
21ab4e
         return nlc_ctx;
21ab4e
 }
21ab4e
@@ -342,14 +430,20 @@ static void
21ab4e
 nlc_cache_timeout_handler (struct gf_tw_timer_list *timer,
21ab4e
                            void *data, unsigned long calltime)
21ab4e
 {
21ab4e
-        nlc_timer_data_t *tmp = data;
21ab4e
-
21ab4e
-        nlc_inode_clear_cache (tmp->this, tmp->inode, NLC_TIMER_EXPIRED);
21ab4e
-        inode_unref (tmp->inode);
21ab4e
+        nlc_timer_data_t *tmp     = data;
21ab4e
+        nlc_ctx_t        *nlc_ctx = NULL;
21ab4e
 
21ab4e
-        GF_FREE (tmp);
21ab4e
-        GF_FREE (timer);
21ab4e
+        nlc_inode_ctx_get (tmp->this, tmp->inode, &nlc_ctx, NULL);
21ab4e
+        if (!nlc_ctx)
21ab4e
+                goto out;
21ab4e
 
21ab4e
+        /* Taking nlc_ctx->lock will lead to deadlock, hence updating
21ab4e
+         * the cache is invalid outside of lock, instead of clear_cache.
21ab4e
+         * Since cache_time is assigned outside of lock, the value can
21ab4e
+         * be invalid for short time, this may result in false negative
21ab4e
+         * which is better than deadlock */
21ab4e
+        nlc_ctx->cache_time = 0;
21ab4e
+out:
21ab4e
         return;
21ab4e
 }
21ab4e
 
21ab4e
@@ -361,10 +455,14 @@ __nlc_inode_ctx_timer_delete (xlator_t *this, nlc_ctx_t *nlc_ctx)
21ab4e
 
21ab4e
         conf = this->private;
21ab4e
 
21ab4e
-        gf_tw_del_timer (conf->timer_wheel, nlc_ctx->timer);
21ab4e
+        if (nlc_ctx->timer)
21ab4e
+                gf_tw_del_timer (conf->timer_wheel, nlc_ctx->timer);
21ab4e
 
21ab4e
-        inode_unref (nlc_ctx->timer_data->inode);
21ab4e
-        GF_FREE (nlc_ctx->timer_data);
21ab4e
+        if (nlc_ctx->timer_data) {
21ab4e
+                inode_unref (nlc_ctx->timer_data->inode);
21ab4e
+                GF_FREE (nlc_ctx->timer_data);
21ab4e
+                nlc_ctx->timer_data = NULL;
21ab4e
+        }
21ab4e
 
21ab4e
         GF_FREE (nlc_ctx->timer);
21ab4e
         nlc_ctx->timer = NULL;
21ab4e
@@ -553,7 +651,7 @@ nlc_clear_all_cache (xlator_t *this)
21ab4e
 }
21ab4e
 
21ab4e
 
21ab4e
-static void
21ab4e
+void
21ab4e
 __nlc_free_pe (xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_pe_t *pe)
21ab4e
 {
21ab4e
         uint64_t          pe_int      = 0;
21ab4e
@@ -584,7 +682,7 @@ __nlc_free_pe (xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_pe_t *pe)
21ab4e
 }
21ab4e
 
21ab4e
 
21ab4e
-static void
21ab4e
+void
21ab4e
 __nlc_free_ne (xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_ne_t *ne)
21ab4e
 {
21ab4e
         nlc_conf_t                  *conf   = NULL;
21ab4e
@@ -606,49 +704,22 @@ __nlc_free_ne (xlator_t *this, nlc_ctx_t *nlc_ctx, nlc_ne_t *ne)
21ab4e
 void
21ab4e
 nlc_inode_clear_cache (xlator_t *this, inode_t *inode, int reason)
21ab4e
 {
21ab4e
-        uint64_t         nlc_ctx_int = 0;
21ab4e
         nlc_ctx_t        *nlc_ctx    = NULL;
21ab4e
-        nlc_pe_t         *pe         = NULL;
21ab4e
-        nlc_pe_t         *tmp        = NULL;
21ab4e
-        nlc_ne_t         *ne         = NULL;
21ab4e
-        nlc_ne_t         *tmp1       = NULL;
21ab4e
-        nlc_conf_t       *conf       = NULL;
21ab4e
-
21ab4e
-        conf = this->private;
21ab4e
 
21ab4e
-        inode_ctx_reset0 (inode, this, &nlc_ctx_int);
21ab4e
-        if (nlc_ctx_int == 0)
21ab4e
+        nlc_inode_ctx_get (this, inode, &nlc_ctx, NULL);
21ab4e
+        if (!nlc_ctx)
21ab4e
                 goto out;
21ab4e
 
21ab4e
-        nlc_ctx = (void *) (long) nlc_ctx_int;
21ab4e
-
21ab4e
-        if (reason != NLC_LRU_PRUNE)
21ab4e
-                nlc_remove_from_lru (this, inode);
21ab4e
-
21ab4e
         LOCK (&nlc_ctx->lock);
21ab4e
         {
21ab4e
-                if (reason != NLC_TIMER_EXPIRED)
21ab4e
-                        __nlc_inode_ctx_timer_delete (this, nlc_ctx);
21ab4e
-
21ab4e
-                if (IS_PE_VALID (nlc_ctx->state))
21ab4e
-                        list_for_each_entry_safe (pe, tmp, &nlc_ctx->pe, list) {
21ab4e
-                                __nlc_free_pe (this, nlc_ctx, pe);
21ab4e
-                        }
21ab4e
+                __nlc_inode_ctx_timer_delete (this, nlc_ctx);
21ab4e
 
21ab4e
-                if (IS_NE_VALID (nlc_ctx->state))
21ab4e
-                        list_for_each_entry_safe (ne, tmp1, &nlc_ctx->ne, list) {
21ab4e
-                                __nlc_free_ne (this, nlc_ctx, ne);
21ab4e
-                        }
21ab4e
+                __nlc_inode_clear_entries (this, nlc_ctx);
21ab4e
         }
21ab4e
         UNLOCK (&nlc_ctx->lock);
21ab4e
 
21ab4e
-        LOCK_DESTROY (&nlc_ctx->lock);
21ab4e
-
21ab4e
-        nlc_ctx->cache_size -= sizeof (*nlc_ctx);
21ab4e
-        GF_ASSERT (nlc_ctx->cache_size == 0);
21ab4e
-        GF_FREE (nlc_ctx);
21ab4e
-
21ab4e
-        GF_ATOMIC_SUB (conf->current_cache_size, sizeof (*nlc_ctx));
21ab4e
+        if (reason != NLC_LRU_PRUNE)
21ab4e
+                nlc_remove_from_lru (this, inode);
21ab4e
 
21ab4e
 out:
21ab4e
         return;
21ab4e
@@ -864,10 +935,14 @@ nlc_dir_remove_pe (xlator_t *this, inode_t *parent, inode_t *entry_ino,
21ab4e
 
21ab4e
         LOCK (&nlc_ctx->lock);
21ab4e
         {
21ab4e
+                if (!__nlc_is_cache_valid (this, nlc_ctx))
21ab4e
+                        goto unlock;
21ab4e
+
21ab4e
                 __nlc_del_pe (this, nlc_ctx, entry_ino, name, multilink);
21ab4e
                 __nlc_add_ne (this, nlc_ctx, name);
21ab4e
                 __nlc_set_dir_state (nlc_ctx, NLC_NE_VALID);
21ab4e
         }
21ab4e
+unlock:
21ab4e
         UNLOCK (&nlc_ctx->lock);
21ab4e
 out:
21ab4e
         return;
21ab4e
diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c
21ab4e
index a72f039..7dad8d9 100644
21ab4e
--- a/xlators/performance/nl-cache/src/nl-cache.c
21ab4e
+++ b/xlators/performance/nl-cache/src/nl-cache.c
21ab4e
@@ -473,12 +473,17 @@ nlc_invalidate (xlator_t *this, void *data)
21ab4e
         inode_t                             *parent2    = NULL;
21ab4e
         int                                  ret        = 0;
21ab4e
         inode_table_t                       *itable     = NULL;
21ab4e
+        nlc_conf_t                          *conf       = NULL;
21ab4e
 
21ab4e
         up_data = (struct gf_upcall *)data;
21ab4e
 
21ab4e
         if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
21ab4e
                 goto out;
21ab4e
 
21ab4e
+        conf = this->private;
21ab4e
+        if (!conf)
21ab4e
+                 goto out;
21ab4e
+
21ab4e
         up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
21ab4e
 
21ab4e
         /*TODO: Add he inodes found as a member in gf_upcall_cache_invalidation
21ab4e
@@ -520,6 +525,9 @@ nlc_invalidate (xlator_t *this, void *data)
21ab4e
                 nlc_inode_clear_cache (this, parent1, NLC_NONE);
21ab4e
         if (parent2)
21ab4e
                 nlc_inode_clear_cache (this, parent2, NLC_NONE);
21ab4e
+
21ab4e
+        GF_ATOMIC_INC (conf->nlc_counter.nlc_invals);
21ab4e
+
21ab4e
 out:
21ab4e
         if (inode)
21ab4e
                 inode_unref (inode);
21ab4e
@@ -568,12 +576,23 @@ notify (xlator_t *this, int event, void *data, ...)
21ab4e
 static int32_t
21ab4e
 nlc_forget (xlator_t *this, inode_t *inode)
21ab4e
 {
21ab4e
-        uint64_t pe_int = 0;
21ab4e
+        uint64_t         pe_int      = 0;
21ab4e
+        uint64_t         nlc_ctx_int = 0;
21ab4e
+        nlc_ctx_t       *nlc_ctx     = NULL;
21ab4e
+        nlc_conf_t      *conf        = NULL;
21ab4e
+
21ab4e
+        conf = this->private;
21ab4e
 
21ab4e
         inode_ctx_reset1 (inode, this, &pe_int);
21ab4e
         GF_ASSERT (pe_int == 0);
21ab4e
 
21ab4e
         nlc_inode_clear_cache (this, inode, NLC_NONE);
21ab4e
+        inode_ctx_reset0 (inode, this, &nlc_ctx_int);
21ab4e
+        nlc_ctx = (void *) (long) nlc_ctx_int;
21ab4e
+        if (nlc_ctx) {
21ab4e
+                GF_FREE (nlc_ctx);
21ab4e
+                GF_ATOMIC_SUB (conf->current_cache_size, sizeof (*nlc_ctx));
21ab4e
+        }
21ab4e
 
21ab4e
         return 0;
21ab4e
 }
21ab4e
diff --git a/xlators/performance/nl-cache/src/nl-cache.h b/xlators/performance/nl-cache/src/nl-cache.h
21ab4e
index 10ec022..3bd7c83 100644
21ab4e
--- a/xlators/performance/nl-cache/src/nl-cache.h
21ab4e
+++ b/xlators/performance/nl-cache/src/nl-cache.h
21ab4e
@@ -44,7 +44,6 @@
21ab4e
 
21ab4e
 enum nlc_cache_clear_reason {
21ab4e
         NLC_NONE = 0,
21ab4e
-        NLC_TIMER_EXPIRED,
21ab4e
         NLC_LRU_PRUNE,
21ab4e
 };
21ab4e
 
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e