3604df
From 6d168f9901ed02def8c53e9a4915b421ff1294f7 Mon Sep 17 00:00:00 2001
3604df
From: Poornima G <pgurusid@redhat.com>
3604df
Date: Tue, 23 Aug 2016 18:15:22 +0530
3604df
Subject: [PATCH 128/141] dht, md-cache, upcall: Add invalidation of IATT when the layout changes
3604df
3604df
Issue:
3604df
dht_layout is built as a part of lookup only. The layout can be
3604df
modified by rebalance process. Since every IO fop is preceded
3604df
by a lookup, there are very less issues of stale layout. But
3604df
with enhancements of aggressive caching of stats in md-cache,
3604df
the lookup will reduce and expose the stale layout issue often.
3604df
3604df
Solution:
3604df
Since stale layout is already an issue on dht, there is already
3604df
a plan to fix this at the dht layer, but this fix is not currently
3604df
planned for any release. Until this fix comes out, we can have
3604df
a workaround where, the upcall will send a notification to md-cache
3604df
when a layout xattr is changed. As a part of layout change notification
3604df
the existing cache is invalidated and the next lookup will fetch the
3604df
latest layout.
3604df
3604df
This is not a foolproof solution as the window between the layout change
3604df
and the next lookup(after invalidation of stat), where there will be stale
3604df
layout. But until the final fix comes in, this reduces the stale layout
3604df
window.
3604df
3604df
Change-Id: Iacf871a38b35880c1fc0bc68fe7ce291265e71d4
3604df
BUG: 1284873
3604df
Signed-off-by: Poornima G <pgurusid@redhat.com>
3604df
Reviewed-on: http://review.gluster.org/15300
3604df
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/87041
3604df
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
3604df
Tested-by: Rajesh Joseph <rjoseph@redhat.com>
3604df
---
3604df
 libglusterfs/src/glusterfs.h                |    4 ++++
3604df
 xlators/cluster/dht/src/dht-common.c        |   24 +++++++++++++++++++++++-
3604df
 xlators/performance/md-cache/src/md-cache.c |   12 +++++++-----
3604df
 3 files changed, 34 insertions(+), 6 deletions(-)
3604df
3604df
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
3604df
index af59c73..8206e23 100644
3604df
--- a/libglusterfs/src/glusterfs.h
3604df
+++ b/libglusterfs/src/glusterfs.h
3604df
@@ -283,6 +283,10 @@
3604df
 #define GF_LK_ADVISORY 0
3604df
 #define GF_LK_MANDATORY 1
3604df
 
3604df
+/* md-cache keys */
3604df
+#define MDC_INVALIDATE_IATT "mdc.invalidate.iatt"
3604df
+#define MDC_INVALIDATE_XATT "mdc.invalidate.xatt"
3604df
+
3604df
 const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
3604df
 const char *fop_enum_to_string (glusterfs_fop_t fop);
3604df
 
3604df
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
3604df
index 1906376..a160d3f 100644
3604df
--- a/xlators/cluster/dht/src/dht-common.c
3604df
+++ b/xlators/cluster/dht/src/dht-common.c
3604df
@@ -19,6 +19,7 @@
3604df
 #include "byte-order.h"
3604df
 #include "glusterfs-acl.h"
3604df
 #include "quota-common-utils.h"
3604df
+#include "upcall-utils.h"
3604df
 
3604df
 #include <sys/time.h>
3604df
 #include <libgen.h>
3604df
@@ -8601,6 +8602,11 @@ dht_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
3604df
         call_cnt        = conf->subvolume_cnt;
3604df
         local->call_cnt = call_cnt;
3604df
 
3604df
+        if (xdata) {
3604df
+                if (dict_set_int8 (xdata, conf->xattr_name, 0) < 0)
3604df
+                        goto err;
3604df
+        }
3604df
+
3604df
         for (i = 0; i < call_cnt; i++) {
3604df
                 STACK_WIND (frame, dht_ipc_cbk, conf->subvolumes[i],
3604df
                             conf->subvolumes[i]->fops->ipc, op, xdata);
3604df
@@ -8663,6 +8669,8 @@ dht_notify (xlator_t *this, int event, void *data, ...)
3604df
         dict_t                  *output = NULL;
3604df
         va_list                  ap;
3604df
         dht_methods_t           *methods = NULL;
3604df
+        struct gf_upcall        *up_data = NULL;
3604df
+        struct gf_upcall_cache_invalidation *up_ci = NULL;
3604df
 
3604df
         conf = this->private;
3604df
         GF_VALIDATE_OR_GOTO (this->name, conf, out);
3604df
@@ -8831,7 +8839,21 @@ unlock:
3604df
                 return ret;
3604df
                 break;
3604df
         }
3604df
-
3604df
+        case GF_EVENT_UPCALL:
3604df
+                up_data = (struct gf_upcall *)data;
3604df
+                if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
3604df
+                        break;
3604df
+                up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
3604df
+
3604df
+                /* Since md-cache will be aggressively filtering lookups,
3604df
+                 * the stale layout issue will be more pronounced. Hence
3604df
+                 * when a layout xattr is changed by the rebalance process
3604df
+                 * notify all the md-cache clients to invalidate the existing
3604df
+                 * stat cache and send the lookup next time*/
3604df
+                if (up_ci->dict && dict_get (up_ci->dict, conf->xattr_name))
3604df
+                        ret = dict_set_int8 (up_ci->dict, MDC_INVALIDATE_IATT , 0);
3604df
+                propagate = 1;
3604df
+                break;
3604df
         default:
3604df
                 propagate = 1;
3604df
                 break;
3604df
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
3604df
index a3f13ff..c5d99ba 100644
3604df
--- a/xlators/performance/md-cache/src/md-cache.c
3604df
+++ b/xlators/performance/md-cache/src/md-cache.c
3604df
@@ -808,10 +808,9 @@ out:
3604df
 }
3604df
 
3604df
 
3604df
-int
3604df
+void
3604df
 mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode)
3604df
 {
3604df
-        int              ret = -1;
3604df
         struct md_cache *mdc = NULL;
3604df
 
3604df
         if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
3604df
@@ -824,7 +823,7 @@ mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode)
3604df
         UNLOCK (&mdc->lock);
3604df
 
3604df
 out:
3604df
-        return ret;
3604df
+        return;
3604df
 }
3604df
 
3604df
 
3604df
@@ -2539,8 +2538,11 @@ mdc_invalidate (xlator_t *this, void *data)
3604df
         }
3604df
 
3604df
         if (up_ci->flags & IATT_UPDATE_FLAGS) {
3604df
-                ret = mdc_inode_iatt_set_validate (this, inode, NULL,
3604df
-                                                   &up_ci->stat);
3604df
+                if (up_ci->dict && dict_get (up_ci->dict, MDC_INVALIDATE_IATT))
3604df
+                        mdc_inode_iatt_invalidate (this, inode);
3604df
+                else
3604df
+                        ret = mdc_inode_iatt_set_validate (this, inode, NULL,
3604df
+                                                           &up_ci->stat);
3604df
                 /* one of the scenarios where ret < 0 is when this invalidate
3604df
                  * is older than the current stat, in that case do not
3604df
                  * update the xattrs as well
3604df
-- 
3604df
1.7.1
3604df