|
|
21ab4e |
From 4265b6bb74aebe4723a6748d29fd5d728f1ea169 Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
21ab4e |
Date: Wed, 28 Jun 2017 09:10:53 +0530
|
|
|
21ab4e |
Subject: [PATCH 534/539] features/shard: Remove ctx from LRU in shard_forget
|
|
|
21ab4e |
|
|
|
21ab4e |
Problem:
|
|
|
21ab4e |
There is a race when the following two commands are executed on the mount in
|
|
|
21ab4e |
parallel from two different terminals on a sharded volume,
|
|
|
21ab4e |
which leads to use-after-free.
|
|
|
21ab4e |
|
|
|
21ab4e |
Terminal-1:
|
|
|
21ab4e |
while true; do dd if=/dev/zero of=file1 bs=1M count=4; done
|
|
|
21ab4e |
|
|
|
21ab4e |
Terminal-2:
|
|
|
21ab4e |
while true; do cat file1 > /dev/null; done
|
|
|
21ab4e |
|
|
|
21ab4e |
In the normal case this is the life-cycle of a shard-inode
|
|
|
21ab4e |
1) Shard is added to LRU when it is first looked-up
|
|
|
21ab4e |
2) For every operation on the shard it is moved up in LRU
|
|
|
21ab4e |
3) When "unlink of the shard"/"LRU limit is hit" happens it is removed from LRU
|
|
|
21ab4e |
|
|
|
21ab4e |
But we are seeing a race where the inode stays in Shard LRU even after it is
|
|
|
21ab4e |
forgotten which leads to Use-after-free and then some memory-corruptions.
|
|
|
21ab4e |
|
|
|
21ab4e |
These are the steps:
|
|
|
21ab4e |
1) Shard is added to LRU when it is first looked-up
|
|
|
21ab4e |
2) For every operation on the shard it is moved up in LRU
|
|
|
21ab4e |
|
|
|
21ab4e |
Reader-handler Truncate-handler
|
|
|
21ab4e |
1) Reader handler needs shard-x to be read. 1) Truncate has just deleted shard-x
|
|
|
21ab4e |
2) In shard_common_resolve_shards(), it does
|
|
|
21ab4e |
inode_resolve() and that leads to
|
|
|
21ab4e |
a hit in LRU, so it is going to call
|
|
|
21ab4e |
__shard_update_shards_inode_list() to move the
|
|
|
21ab4e |
inode to top of LRU
|
|
|
21ab4e |
2) shard-x gets unlinked from the itable
|
|
|
21ab4e |
and inode_forget(inode, 0) is called
|
|
|
21ab4e |
to make sure the inode can be purged
|
|
|
21ab4e |
upon last unref
|
|
|
21ab4e |
3) when __shard_update_shards_inode_list() is
|
|
|
21ab4e |
called it finds that the inode is not in LRU
|
|
|
21ab4e |
so it adds it back to the LRU-list
|
|
|
21ab4e |
|
|
|
21ab4e |
Both these operations complete and call inode_unref(shard-x) which leads to the inode
|
|
|
21ab4e |
getting freed and forgotten, even when it is in Shard LRU list. When more inodes are
|
|
|
21ab4e |
added to LRU, use-after-free will happen and it leads to undefined behaviors.
|
|
|
21ab4e |
|
|
|
21ab4e |
Fix:
|
|
|
21ab4e |
I see that the inode can be removed from LRU even by the protocol layers like gfapi/gNFS
|
|
|
21ab4e |
when LRU limit is reached. So it is better to add a check in shard_forget() to remove itself
|
|
|
21ab4e |
from LRU list if it exists.
|
|
|
21ab4e |
|
|
|
21ab4e |
>BUG: 1466037
|
|
|
21ab4e |
>Change-Id: Ia79c0c5c9d5febc56c41ddb12b5daf03e5281638
|
|
|
21ab4e |
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
21ab4e |
>Reviewed-on: https://review.gluster.org/17644
|
|
|
21ab4e |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
>Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
21ab4e |
|
|
|
21ab4e |
BUG: 1464453
|
|
|
21ab4e |
Change-Id: Ia79c0c5c9d5febc56c41ddb12b5daf03e5281638
|
|
|
21ab4e |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/110565
|
|
|
21ab4e |
---
|
|
|
21ab4e |
tests/bugs/shard/parallel-truncate-read.t | 48 +++++++++++++++++++++++++++++++
|
|
|
21ab4e |
xlators/features/shard/src/shard.c | 13 +++++++++
|
|
|
21ab4e |
2 files changed, 61 insertions(+)
|
|
|
21ab4e |
create mode 100644 tests/bugs/shard/parallel-truncate-read.t
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/tests/bugs/shard/parallel-truncate-read.t b/tests/bugs/shard/parallel-truncate-read.t
|
|
|
21ab4e |
new file mode 100644
|
|
|
21ab4e |
index 0000000..4de876f
|
|
|
21ab4e |
--- /dev/null
|
|
|
21ab4e |
+++ b/tests/bugs/shard/parallel-truncate-read.t
|
|
|
21ab4e |
@@ -0,0 +1,48 @@
|
|
|
21ab4e |
+#!/bin/bash
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+#This test will crash if shard's LRU contains a shard's inode even after the
|
|
|
21ab4e |
+#inode is forgotten. Minimum time for crash to happen I saw was 180 seconds
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+. $(dirname $0)/../../include.rc
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+function keep_writing {
|
|
|
21ab4e |
+ cd $M0;
|
|
|
21ab4e |
+ while [ -f /tmp/parallel-truncate-read ]
|
|
|
21ab4e |
+ do
|
|
|
21ab4e |
+ dd if=/dev/zero of=file1 bs=1M count=16
|
|
|
21ab4e |
+ done
|
|
|
21ab4e |
+ cd
|
|
|
21ab4e |
+}
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+function keep_reading {
|
|
|
21ab4e |
+ cd $M0;
|
|
|
21ab4e |
+ while [ -f /tmp/parallel-truncate-read ]
|
|
|
21ab4e |
+ do
|
|
|
21ab4e |
+ cat file1 > /dev/null
|
|
|
21ab4e |
+ done
|
|
|
21ab4e |
+ cd
|
|
|
21ab4e |
+}
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+cleanup;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST touch /tmp/parallel-truncate-read
|
|
|
21ab4e |
+TEST glusterd
|
|
|
21ab4e |
+TEST pidof glusterd
|
|
|
21ab4e |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
|
21ab4e |
+TEST $CLI volume set $V0 features.shard on
|
|
|
21ab4e |
+TEST $CLI volume set $V0 performance.quick-read off
|
|
|
21ab4e |
+TEST $CLI volume set $V0 performance.io-cache off
|
|
|
21ab4e |
+TEST $CLI volume set $V0 performance.stat-prefetch off
|
|
|
21ab4e |
+TEST $CLI volume set $V0 performance.read-ahead off
|
|
|
21ab4e |
+TEST $CLI volume start $V0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
|
|
|
21ab4e |
+keep_writing &
|
|
|
21ab4e |
+keep_reading &
|
|
|
21ab4e |
+sleep 180
|
|
|
21ab4e |
+TEST rm -f /tmp/parallel-truncate-read
|
|
|
21ab4e |
+wait
|
|
|
21ab4e |
+#test that the mount is operational
|
|
|
21ab4e |
+TEST stat $M0
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+cleanup;
|
|
|
21ab4e |
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
|
21ab4e |
index 141c42f..b97608f 100644
|
|
|
21ab4e |
--- a/xlators/features/shard/src/shard.c
|
|
|
21ab4e |
+++ b/xlators/features/shard/src/shard.c
|
|
|
21ab4e |
@@ -5018,13 +5018,26 @@ shard_forget (xlator_t *this, inode_t *inode)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
uint64_t ctx_uint = 0;
|
|
|
21ab4e |
shard_inode_ctx_t *ctx = NULL;
|
|
|
21ab4e |
+ shard_priv_t *priv = NULL;
|
|
|
21ab4e |
|
|
|
21ab4e |
+ priv = this->private;
|
|
|
21ab4e |
inode_ctx_del (inode, this, &ctx_uint);
|
|
|
21ab4e |
if (!ctx_uint)
|
|
|
21ab4e |
return 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
ctx = (shard_inode_ctx_t *)ctx_uint;
|
|
|
21ab4e |
|
|
|
21ab4e |
+ /* When LRU limit reaches inode will be forcefully removed from the
|
|
|
21ab4e |
+ * table, inode needs to be removed from LRU of shard as well.
|
|
|
21ab4e |
+ */
|
|
|
21ab4e |
+ if (!list_empty (&ctx->ilist)) {
|
|
|
21ab4e |
+ LOCK(&priv->lock);
|
|
|
21ab4e |
+ {
|
|
|
21ab4e |
+ list_del_init (&ctx->ilist);
|
|
|
21ab4e |
+ priv->inode_count--;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ UNLOCK(&priv->lock);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
GF_FREE (ctx);
|
|
|
21ab4e |
|
|
|
21ab4e |
return 0;
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|