|
|
d1681e |
From 4623ebe9a69a5284565b4ad253084a1236478988 Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Soumya Koduri <skoduri@redhat.com>
|
|
|
d1681e |
Date: Fri, 3 Nov 2017 15:41:34 +0530
|
|
|
d1681e |
Subject: [PATCH 243/260] timer: Fix possible race during cleanup
|
|
|
d1681e |
|
|
|
d1681e |
As mentioned in bug1509189, there is a possible race
|
|
|
d1681e |
between gf_timer_cancel(), gf_timer_proc() and
|
|
|
d1681e |
gf_timer_registry_destroy() leading to use_after_free.
|
|
|
d1681e |
|
|
|
d1681e |
Problem:
|
|
|
d1681e |
|
|
|
d1681e |
1) gf_timer_proc() is called, locks reg, and gets an event.
|
|
|
d1681e |
It unlocks reg, and calls the callback.
|
|
|
d1681e |
|
|
|
d1681e |
2) Meanwhile gf_timer_registry_destroy() is called, and removes
|
|
|
d1681e |
reg from ctx, and joins on gf_timer_proc().
|
|
|
d1681e |
|
|
|
d1681e |
3) gf_timer_call_cancel() is called on the event being
|
|
|
d1681e |
processed. It cannot find reg (since it's been removed from reg),
|
|
|
d1681e |
so it frees event.
|
|
|
d1681e |
|
|
|
d1681e |
4) the callback returns into gf_timer_proc(), and it tries to free
|
|
|
d1681e |
event, but it's already free, so double free.
|
|
|
d1681e |
|
|
|
d1681e |
Solution:
|
|
|
d1681e |
The fix is to bail out in gf_timer_cancel() when registry
|
|
|
d1681e |
is not found. The logic behind this is that, gf_timer_cancel()
|
|
|
d1681e |
is called only on any existing event. That means there was a valid
|
|
|
d1681e |
registry earlier while creating that event. And the only reason
|
|
|
d1681e |
we cannot find that registry now is that it must have got set to
|
|
|
d1681e |
NULL when context cleanup is started.
|
|
|
d1681e |
Since gf_timer_proc() takes care of releasing all the remaining
|
|
|
d1681e |
events active on that registry, it seems safe to bail out
|
|
|
d1681e |
in gf_timer_cancel().
|
|
|
d1681e |
|
|
|
d1681e |
>Change-Id: Ia9b088533141c3bb335eff2fe06b52d1575bb34f
|
|
|
d1681e |
>BUG: 1509189
|
|
|
d1681e |
>Reported-by: Daniel Gryniewicz <dang@redhat.com>
|
|
|
d1681e |
>Signed-off-by: Soumya Koduri <skoduri@redhat.com>
|
|
|
d1681e |
Upstream Link: https://review.gluster.org/#/c/18652/
|
|
|
d1681e |
|
|
|
d1681e |
BUG: 1568374
|
|
|
d1681e |
Change-Id: Ia9b088533141c3bb335eff2fe06b52d1575bb34f
|
|
|
d1681e |
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/135896
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
libglusterfs/src/timer.c | 15 ++++++++++++---
|
|
|
d1681e |
1 file changed, 12 insertions(+), 3 deletions(-)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
|
|
|
d1681e |
index 34dfd35..d6f008d 100644
|
|
|
d1681e |
--- a/libglusterfs/src/timer.c
|
|
|
d1681e |
+++ b/libglusterfs/src/timer.c
|
|
|
d1681e |
@@ -83,6 +83,13 @@ gf_timer_call_cancel (glusterfs_ctx_t *ctx,
|
|
|
d1681e |
return 0;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+ if (ctx->cleanup_started) {
|
|
|
d1681e |
+ gf_msg_callingfn ("timer", GF_LOG_INFO, 0,
|
|
|
d1681e |
+ LG_MSG_CTX_CLEANUP_STARTED,
|
|
|
d1681e |
+ "ctx cleanup started");
|
|
|
d1681e |
+ return 0;
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
LOCK (&ctx->lock);
|
|
|
d1681e |
{
|
|
|
d1681e |
reg = ctx->timer;
|
|
|
d1681e |
@@ -90,9 +97,11 @@ gf_timer_call_cancel (glusterfs_ctx_t *ctx,
|
|
|
d1681e |
UNLOCK (&ctx->lock);
|
|
|
d1681e |
|
|
|
d1681e |
if (!reg) {
|
|
|
d1681e |
- gf_msg ("timer", GF_LOG_ERROR, 0, LG_MSG_INIT_TIMER_FAILED,
|
|
|
d1681e |
- "!reg");
|
|
|
d1681e |
- GF_FREE (event);
|
|
|
d1681e |
+ /* This can happen when cleanup may have just started and
|
|
|
d1681e |
+ * gf_timer_registry_destroy() sets ctx->timer to NULL.
|
|
|
d1681e |
+ * Just bail out as success as gf_timer_proc() takes
|
|
|
d1681e |
+ * care of cleaning up the events.
|
|
|
d1681e |
+ */
|
|
|
d1681e |
return 0;
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|