3604df
From 7038b342cd1406bd432b828b8ea6804e9d061853 Mon Sep 17 00:00:00 2001
3604df
From: Poornima G <pgurusid@redhat.com>
3604df
Date: Mon, 21 Nov 2016 19:57:08 +0530
3604df
Subject: [PATCH 205/206] libglusterfs: Fix a read hang
3604df
3604df
Backport of http://review.gluster.org/15923
3604df
3604df
Issue:
3604df
=====
3604df
In certain cases, there was no unwind of read
3604df
from read-ahead xlator, thus resulting in hang.
3604df
3604df
RCA:
3604df
====
3604df
In certain cases, ioc_readv() issues STACK_WIND_TAIL() instead
3604df
of STACK_WIND(). One such case is when inode_ctx for that file
3604df
is not present (can happen if readdirp was called, and populates
3604df
md-cache and serves all the lookups from cache).
3604df
3604df
Consider the following graph:
3604df
...
3604df
io-cache (parent)
3604df
   |
3604df
readdir-ahead
3604df
   |
3604df
read-ahead
3604df
...
3604df
3604df
Below is the code snippet of ioc_readv calling STACK_WIND_TAIL:
3604df
ioc_readv()
3604df
{
3604df
...
3604df
 if (!inode_ctx)
3604df
   STACK_WIND_TAIL (frame, FIRST_CHILD (frame->this),
3604df
                    FIRST_CHILD (frame->this)->fops->readv, fd,
3604df
                    size, offset, flags, xdata);
3604df
   /* Ideally, this stack_wind should wind to readdir-ahead:readv()
3604df
      but it winds to read-ahead:readv(). See below for
3604df
      explaination.
3604df
    */
3604df
...
3604df
}
3604df
3604df
STACK_WIND_TAIL (frame, obj, fn, ...)
3604df
{
3604df
  frame->this = obj;
3604df
  /* for the above mentioned graph, frame->this will be readdir-ahead
3604df
   * frame->this = FIRST_CHILD (frame->this) i.e. readdir-ahead, which
3604df
   * is as expected
3604df
   */
3604df
  ...
3604df
  THIS = obj;
3604df
  /* THIS will be read-ahead instead of readdir-ahead!, as obj expands
3604df
   * to "FIRST_CHILD (frame->this)" and frame->this was pointing
3604df
   * to readdir-ahead in the previous statement.
3604df
   */
3604df
  ...
3604df
  fn (frame, obj, params);
3604df
  /* fn will call read-ahead:readv() instead of readdir-ahead:readv()!
3604df
   * as fn expands to "FIRST_CHILD (frame->this)->fops->readv" and
3604df
   * frame->this was pointing ro readdir-ahead in the first statement
3604df
   */
3604df
  ...
3604df
}
3604df
3604df
Thus, the readdir-ahead's readv() implementation will be skipped, and
3604df
ra_readv() will be called with frame->this = "readdir-ahead" and
3604df
this = "read-ahead". This can lead to corruption / hang / other problems.
3604df
But in this perticular case, when 'frame->this' and 'this' passed
3604df
to ra_readv() doesn't match, it causes ra_readv() to call ra_readv()
3604df
again!. Thus the logic of read-ahead readv() falls apart and leads to
3604df
hang.
3604df
3604df
Solution:
3604df
=========
3604df
Modify STACK_WIND_TAIL() as:
3604df
STACK_WIND_TAIL (frame, obj, fn, ...)
3604df
{
3604df
  next_xl = obj /* resolve obj as the variables passed in obj macro
3604df
                   can be overwritten in the further instrucions */
3604df
  next_xl_fn = fn /* resolve fn and store in a tmp variable, before
3604df
                     modifying any variables */
3604df
  frame->this = next_xl;
3604df
  ...
3604df
  THIS = next_xl;
3604df
  ...
3604df
  next_xl_fn (frame, next_xl, params);
3604df
  ...
3604df
}
3604df
3604df
As a part of http://review.gluster.org/15901/ the caller io-cache
3604df
was fixed.
3604df
3604df
>Reviewed-on: http://review.gluster.org/15923
3604df
>Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
>Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
3604df
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
>Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
3604df
3604df
BUG: 1392299
3604df
Change-Id: Ie662ac8f18fa16909376f1e59387bc5b886bd0f9
3604df
Signed-off-by: Poornima G <pgurusid@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/91496
3604df
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
3604df
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
3604df
---
3604df
 libglusterfs/src/stack.h | 8 +++++---
3604df
 1 file changed, 5 insertions(+), 3 deletions(-)
3604df
3604df
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
3604df
index 2899be9..393fdac 100644
3604df
--- a/libglusterfs/src/stack.h
3604df
+++ b/libglusterfs/src/stack.h
3604df
@@ -275,17 +275,19 @@ STACK_RESET (call_stack_t *stack)
3604df
 #define STACK_WIND_TAIL(frame, obj, fn, params ...)                     \
3604df
         do {                                                            \
3604df
                 xlator_t     *old_THIS = NULL;                          \
3604df
+                xlator_t     *next_xl = obj;                            \
3604df
+                typeof(fn)    next_xl_fn = fn;                          \
3604df
                                                                         \
3604df
-                frame->this = obj;                                      \
3604df
+                frame->this = next_xl;                                  \
3604df
                 frame->wind_to = #fn;                                   \
3604df
                 old_THIS = THIS;                                        \
3604df
-                THIS = obj;                                             \
3604df
+                THIS = next_xl;                                         \
3604df
                 gf_msg_trace ("stack-trace", 0,                         \
3604df
                               "stack-address: %p, "                     \
3604df
                               "winding from %s to %s",                  \
3604df
                               frame->root, old_THIS->name,              \
3604df
                               THIS->name);                              \
3604df
-                fn (frame, obj, params);                                \
3604df
+                next_xl_fn (frame, next_xl, params);                    \
3604df
                 THIS = old_THIS;                                        \
3604df
         } while (0)
3604df
 
3604df
-- 
3604df
2.9.3
3604df