a3470f
From 4623ebe9a69a5284565b4ad253084a1236478988 Mon Sep 17 00:00:00 2001
a3470f
From: Soumya Koduri <skoduri@redhat.com>
a3470f
Date: Fri, 3 Nov 2017 15:41:34 +0530
a3470f
Subject: [PATCH 243/260] timer: Fix possible race during cleanup
a3470f
a3470f
As mentioned in bug1509189, there is a possible race
a3470f
between gf_timer_cancel(), gf_timer_proc() and
a3470f
gf_timer_registry_destroy() leading to use_after_free.
a3470f
a3470f
Problem:
a3470f
a3470f
1) gf_timer_proc() is called, locks reg, and gets an event.
a3470f
It unlocks reg, and calls the callback.
a3470f
a3470f
2) Meanwhile gf_timer_registry_destroy() is called, and removes
a3470f
reg from ctx, and joins on gf_timer_proc().
a3470f
a3470f
3) gf_timer_call_cancel() is called on the event being
a3470f
processed.  It cannot find reg (since it's been removed from reg),
a3470f
so it frees event.
a3470f
a3470f
4) the callback returns into gf_timer_proc(), and it tries to free
a3470f
event, but it's already free, so double free.
a3470f
a3470f
Solution:
a3470f
The fix is to bail out in gf_timer_cancel() when registry
a3470f
is not found. The logic behind this is that, gf_timer_cancel()
a3470f
is called only on any existing event. That means there was a valid
a3470f
registry earlier while creating that event. And the only reason
a3470f
we cannot find that registry now is that it must have got set to
a3470f
NULL when context cleanup is started.
a3470f
Since gf_timer_proc() takes care of releasing all the remaining
a3470f
events active on that registry, it seems safe to bail out
a3470f
in gf_timer_cancel().
a3470f
a3470f
>Change-Id: Ia9b088533141c3bb335eff2fe06b52d1575bb34f
a3470f
>BUG: 1509189
a3470f
>Reported-by: Daniel Gryniewicz <dang@redhat.com>
a3470f
>Signed-off-by: Soumya Koduri <skoduri@redhat.com>
a3470f
Upstream Link: https://review.gluster.org/#/c/18652/
a3470f
a3470f
BUG: 1568374
a3470f
Change-Id: Ia9b088533141c3bb335eff2fe06b52d1575bb34f
a3470f
Signed-off-by: Sunil Kumar Acharya <sheggodu@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/135896
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
---
a3470f
 libglusterfs/src/timer.c | 15 ++++++++++++---
a3470f
 1 file changed, 12 insertions(+), 3 deletions(-)
a3470f
a3470f
diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
a3470f
index 34dfd35..d6f008d 100644
a3470f
--- a/libglusterfs/src/timer.c
a3470f
+++ b/libglusterfs/src/timer.c
a3470f
@@ -83,6 +83,13 @@ gf_timer_call_cancel (glusterfs_ctx_t *ctx,
a3470f
                 return 0;
a3470f
         }
a3470f
 
a3470f
+        if (ctx->cleanup_started) {
a3470f
+                gf_msg_callingfn ("timer", GF_LOG_INFO, 0,
a3470f
+                                  LG_MSG_CTX_CLEANUP_STARTED,
a3470f
+                                  "ctx cleanup started");
a3470f
+                return 0;
a3470f
+        }
a3470f
+
a3470f
         LOCK (&ctx->lock);
a3470f
         {
a3470f
                 reg = ctx->timer;
a3470f
@@ -90,9 +97,11 @@ gf_timer_call_cancel (glusterfs_ctx_t *ctx,
a3470f
         UNLOCK (&ctx->lock);
a3470f
 
a3470f
         if (!reg) {
a3470f
-                gf_msg ("timer", GF_LOG_ERROR, 0, LG_MSG_INIT_TIMER_FAILED,
a3470f
-                        "!reg");
a3470f
-                GF_FREE (event);
a3470f
+                /* This can happen when cleanup may have just started and
a3470f
+                 * gf_timer_registry_destroy() sets ctx->timer to NULL.
a3470f
+                 * Just bail out as success as gf_timer_proc() takes
a3470f
+                 * care of cleaning up the events.
a3470f
+                 */
a3470f
                 return 0;
a3470f
         }
a3470f
 
a3470f
-- 
a3470f
1.8.3.1
a3470f