Blob Blame History Raw
From 6d168f9901ed02def8c53e9a4915b421ff1294f7 Mon Sep 17 00:00:00 2001
From: Poornima G <pgurusid@redhat.com>
Date: Tue, 23 Aug 2016 18:15:22 +0530
Subject: [PATCH 128/141] dht, md-cache, upcall: Add invalidation of IATT when the layout changes

Issue:
dht_layout is built as a part of lookup only. The layout can be
modified by rebalance process. Since every IO fop is preceded
by a lookup, there are very less issues of stale layout. But
with enhancements of aggressive caching of stats in md-cache,
the lookup will reduce and expose the stale layout issue often.

Solution:
Since stale layout is already an issue on dht, there is already
a plan to fix this at the dht layer, but this fix is not currently
planned for any release. Until this fix comes out, we can have
a workaround where, the upcall will send a notification to md-cache
when a layout xattr is changed. As a part of layout change notification
the existing cache is invalidated and the next lookup will fetch the
latest layout.

This is not a foolproof solution as the window between the layout change
and the next lookup(after invalidation of stat), where there will be stale
layout. But until the final fix comes in, this reduces the stale layout
window.

Change-Id: Iacf871a38b35880c1fc0bc68fe7ce291265e71d4
BUG: 1284873
Signed-off-by: Poornima G <pgurusid@redhat.com>
Reviewed-on: http://review.gluster.org/15300
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/87041
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Tested-by: Rajesh Joseph <rjoseph@redhat.com>
---
 libglusterfs/src/glusterfs.h                |    4 ++++
 xlators/cluster/dht/src/dht-common.c        |   24 +++++++++++++++++++++++-
 xlators/performance/md-cache/src/md-cache.c |   12 +++++++-----
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index af59c73..8206e23 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -283,6 +283,10 @@
 #define GF_LK_ADVISORY 0
 #define GF_LK_MANDATORY 1
 
+/* md-cache keys */
+#define MDC_INVALIDATE_IATT "mdc.invalidate.iatt"
+#define MDC_INVALIDATE_XATT "mdc.invalidate.xatt"
+
 const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
 const char *fop_enum_to_string (glusterfs_fop_t fop);
 
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 1906376..a160d3f 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -19,6 +19,7 @@
 #include "byte-order.h"
 #include "glusterfs-acl.h"
 #include "quota-common-utils.h"
+#include "upcall-utils.h"
 
 #include <sys/time.h>
 #include <libgen.h>
@@ -8601,6 +8602,11 @@ dht_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
         call_cnt        = conf->subvolume_cnt;
         local->call_cnt = call_cnt;
 
+        if (xdata) {
+                if (dict_set_int8 (xdata, conf->xattr_name, 0) < 0)
+                        goto err;
+        }
+
         for (i = 0; i < call_cnt; i++) {
                 STACK_WIND (frame, dht_ipc_cbk, conf->subvolumes[i],
                             conf->subvolumes[i]->fops->ipc, op, xdata);
@@ -8663,6 +8669,8 @@ dht_notify (xlator_t *this, int event, void *data, ...)
         dict_t                  *output = NULL;
         va_list                  ap;
         dht_methods_t           *methods = NULL;
+        struct gf_upcall        *up_data = NULL;
+        struct gf_upcall_cache_invalidation *up_ci = NULL;
 
         conf = this->private;
         GF_VALIDATE_OR_GOTO (this->name, conf, out);
@@ -8831,7 +8839,21 @@ unlock:
                 return ret;
                 break;
         }
-
+        case GF_EVENT_UPCALL:
+                up_data = (struct gf_upcall *)data;
+                if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+                        break;
+                up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+                /* Since md-cache will be aggressively filtering lookups,
+                 * the stale layout issue will be more pronounced. Hence
+                 * when a layout xattr is changed by the rebalance process
+                 * notify all the md-cache clients to invalidate the existing
+                 * stat cache and send the lookup next time*/
+                if (up_ci->dict && dict_get (up_ci->dict, conf->xattr_name))
+                        ret = dict_set_int8 (up_ci->dict, MDC_INVALIDATE_IATT , 0);
+                propagate = 1;
+                break;
         default:
                 propagate = 1;
                 break;
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index a3f13ff..c5d99ba 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -808,10 +808,9 @@ out:
 }
 
 
-int
+void
 mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode)
 {
-        int              ret = -1;
         struct md_cache *mdc = NULL;
 
         if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
@@ -824,7 +823,7 @@ mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode)
         UNLOCK (&mdc->lock);
 
 out:
-        return ret;
+        return;
 }
 
 
@@ -2539,8 +2538,11 @@ mdc_invalidate (xlator_t *this, void *data)
         }
 
         if (up_ci->flags & IATT_UPDATE_FLAGS) {
-                ret = mdc_inode_iatt_set_validate (this, inode, NULL,
-                                                   &up_ci->stat);
+                if (up_ci->dict && dict_get (up_ci->dict, MDC_INVALIDATE_IATT))
+                        mdc_inode_iatt_invalidate (this, inode);
+                else
+                        ret = mdc_inode_iatt_set_validate (this, inode, NULL,
+                                                           &up_ci->stat);
                 /* one of the scenarios where ret < 0 is when this invalidate
                  * is older than the current stat, in that case do not
                  * update the xattrs as well
-- 
1.7.1