Blob Blame History Raw
From 47f957c747f76d54b7289d9651aa057efb5be4c7 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 27 Feb 2017 14:10:02 -0600
Subject: [PATCH 1/7] Fix: libcrmcommon: assert if can't generate operation key

This is a refactor of generate_op_key() to use the existing
crm_strdup_printf() function instead of its own allocation,
and use CRM_ASSERT() instead of CRM_CHECK() for argument validation.

This simplifies the code, avoids allocating more memory than needed,
and results in assertion failures instead of returning NULL on errors,
which is preferable because existing callers don't check the
return value for NULL before using it.
---
 lib/common/utils.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/lib/common/utils.c b/lib/common/utils.c
index 3e3abd3..dbf84c0 100644
--- a/lib/common/utils.c
+++ b/lib/common/utils.c
@@ -629,21 +629,24 @@ crm_get_msec(const char *input)
     return msec;
 }
 
+/*!
+ * \brief Generate an operation key
+ *
+ * \param[in] rsc_id    ID of resource being operated on
+ * \param[in] op_type   Operation name
+ * \param[in] interval  Operation interval
+ *
+ * \return Newly allocated memory containing operation key as string
+ *
+ * \note It is the caller's responsibility to free() the result.
+ */
 char *
 generate_op_key(const char *rsc_id, const char *op_type, int interval)
 {
-    int len = 35;
-    char *op_id = NULL;
-
-    CRM_CHECK(rsc_id != NULL, return NULL);
-    CRM_CHECK(op_type != NULL, return NULL);
-
-    len += strlen(op_type);
-    len += strlen(rsc_id);
-    op_id = malloc(len);
-    CRM_CHECK(op_id != NULL, return NULL);
-    sprintf(op_id, "%s_%s_%d", rsc_id, op_type, interval);
-    return op_id;
+    CRM_ASSERT(rsc_id != NULL);
+    CRM_ASSERT(op_type != NULL);
+    CRM_ASSERT(interval >= 0);
+    return crm_strdup_printf("%s_%s_%d", rsc_id, op_type, interval);
 }
 
 gboolean
-- 
1.8.3.1


From 94a4c58f675d163085a055f59fd6c3a2c9f57c43 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 27 Feb 2017 14:17:01 -0600
Subject: [PATCH 2/7] Fix: libservices: ensure completed ops aren't on blocked
 ops list

This refactors removal of a completed operation from the in-flight ops list
into a new internal function services_untrack_op(), which allows the
in-flight ops list and handle_blocked_ops() to be static to services.c.

The new function also ensures the op isn't on the blocked ops list,
which could otherwise result in a use-after-free bug.
---
 lib/services/services.c         | 23 +++++++++++++++++++++--
 lib/services/services_linux.c   |  7 +------
 lib/services/services_private.h |  2 +-
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/lib/services/services.c b/lib/services/services.c
index 0b535e6..90a2181 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -44,7 +44,9 @@ static GHashTable *recurring_actions = NULL;
 static GList *blocked_ops = NULL;
 
 /* ops currently active (in-flight) */
-GList *inflight_ops = NULL;
+static GList *inflight_ops = NULL;
+
+static void handle_blocked_ops(void);
 
 svc_action_t *
 services_action_create(const char *name, const char *action, int interval, int timeout)
@@ -605,6 +607,23 @@ services_add_inflight_op(svc_action_t * op)
     }
 }
 
+/*!
+ * \internal
+ * \brief Stop tracking an operation that completed
+ *
+ * \param[in] op  Operation to stop tracking
+ */
+void
+services_untrack_op(svc_action_t *op)
+{
+    /* Op is no longer in-flight or blocked */
+    inflight_ops = g_list_remove(inflight_ops, op);
+    blocked_ops = g_list_remove(blocked_ops, op);
+
+    /* Op is no longer blocking other ops, so check if any need to run */
+    handle_blocked_ops();
+}
+
 gboolean
 services_action_async(svc_action_t * op, void (*action_callback) (svc_action_t *))
 {
@@ -649,7 +668,7 @@ is_op_blocked(const char *rsc)
     return FALSE;
 }
 
-void
+static void
 handle_blocked_ops(void)
 {
     GList *executed_ops = NULL;
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index adfefaa..1fe3ec1 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -36,9 +36,6 @@
 #  include "crm/common/cib_secrets.h"
 #endif
 
-/* ops currently active (in-flight) */
-extern GList *inflight_ops;
-
 static inline void
 set_fd_opts(int fd, int opts)
 {
@@ -248,9 +245,7 @@ operation_finalize(svc_action_t * op)
 
     op->pid = 0;
 
-    inflight_ops = g_list_remove(inflight_ops, op);
-
-    handle_blocked_ops();
+    services_untrack_op(op);
 
     if (!recurring && op->synchronous == FALSE) {
         /*
diff --git a/lib/services/services_private.h b/lib/services/services_private.h
index ec9e1c4..a949592 100644
--- a/lib/services/services_private.h
+++ b/lib/services/services_private.h
@@ -76,7 +76,7 @@ G_GNUC_INTERNAL
 void services_add_inflight_op(svc_action_t *op);
 
 G_GNUC_INTERNAL
-void handle_blocked_ops(void);
+void services_untrack_op(svc_action_t *op);
 
 G_GNUC_INTERNAL
 gboolean is_op_blocked(const char *rsc);
-- 
1.8.3.1


From b938c47740a456d0057438e4f5e6029d0e084587 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 27 Feb 2017 14:20:05 -0600
Subject: [PATCH 3/7] Refactor: libservices: dynamically allocate operation key

reduces code duplication, and avoids memory over-allocation
---
 lib/services/services.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/lib/services/services.c b/lib/services/services.c
index 90a2181..4020b7d 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -149,10 +149,7 @@ resources_action_create(const char *name, const char *standard, const char *prov
     op->agent = strdup(agent);
     op->sequence = ++operations;
     op->flags = flags;
-
-    if (asprintf(&op->id, "%s_%s_%d", name, action, interval) == -1) {
-        goto return_error;
-    }
+    op->id = generate_op_key(name, action, interval);
 
     if (strcasecmp(op->standard, "service") == 0) {
         const char *expanded = resources_find_service_class(op->agent);
@@ -467,11 +464,10 @@ gboolean
 services_action_cancel(const char *name, const char *action, int interval)
 {
     svc_action_t *op = NULL;
-    char id[512];
-
-    snprintf(id, sizeof(id), "%s_%s_%d", name, action, interval);
+    char *id = generate_op_key(name, action, interval);
 
     if (!(op = g_hash_table_lookup(recurring_actions, id))) {
+        free(id);
         return FALSE;
     }
 
@@ -494,10 +490,12 @@ services_action_cancel(const char *name, const char *action, int interval)
             /* even though the early termination failed,
              * the op will be marked as cancelled once it completes. */
             crm_err("Termination of %s (pid=%d) failed", id, op->pid);
+            free(id);
             return FALSE;
         }
     }
 
+    free(id);
     return TRUE;
 }
 
@@ -505,11 +503,7 @@ gboolean
 services_action_kick(const char *name, const char *action, int interval /* ms */)
 {
     svc_action_t * op = NULL;
-    char *id = NULL;
-
-    if (asprintf(&id, "%s_%s_%d", name, action, interval) == -1) {
-        return FALSE;
-    }
+    char *id = generate_op_key(name, action, interval);
 
     op = g_hash_table_lookup(recurring_actions, id);
     free(id);
-- 
1.8.3.1


From 933d46ef20591757301784773a37e06b78906584 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 27 Feb 2017 14:28:36 -0600
Subject: [PATCH 4/7] Fix: libservices: prevent use-after-free when freeing an
 operation

---
 lib/services/services.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lib/services/services.c b/lib/services/services.c
index 4020b7d..78a0781 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -410,6 +410,16 @@ services_action_free(svc_action_t * op)
         return;
     }
 
+    /* The operation should be removed from all tracking lists by this point.
+     * If it's not, we have a bug somewhere, so bail. That may lead to a
+     * memory leak, but it's better than a use-after-free segmentation fault.
+     */
+    CRM_CHECK(g_list_find(inflight_ops, op) == NULL, return);
+    CRM_CHECK(g_list_find(blocked_ops, op) == NULL, return);
+    CRM_CHECK((recurring_actions == NULL)
+              || (g_hash_table_lookup(recurring_actions, op->id) == NULL),
+              return);
+
     services_action_cleanup(op);
 
     if (op->opaque->repeat_timer) {
-- 
1.8.3.1


From dc36d4375c049024a6f9e4d2277a3e6444fad05b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 28 Feb 2017 15:41:10 -0600
Subject: [PATCH 5/7] Refactor: libservices: handle in-flight case first when
 cancelling an operation

will make next bugfix easier
---
 lib/services/services.c | 65 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/lib/services/services.c b/lib/services/services.c
index 78a0781..dba4e66 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -470,43 +470,62 @@ cancel_recurring_action(svc_action_t * op)
     return TRUE;
 }
 
+/*!
+ * \brief Cancel a recurring action
+ *
+ * \param[in] name      Name of resource that operation is for
+ * \param[in] action    Name of operation to cancel
+ * \param[in] interval  Interval of operation to cancel
+ *
+ * \return TRUE if action was successfully cancelled, FALSE otherwise
+ */
 gboolean
 services_action_cancel(const char *name, const char *action, int interval)
 {
-    svc_action_t *op = NULL;
+    gboolean cancelled = FALSE;
     char *id = generate_op_key(name, action, interval);
+    svc_action_t *op = g_hash_table_lookup(recurring_actions, id);
 
-    if (!(op = g_hash_table_lookup(recurring_actions, id))) {
-        free(id);
-        return FALSE;
+    /* We can only cancel a recurring action */
+    if (op == NULL) {
+        goto done;
     }
 
-    /* Always kill the recurring timer */
+    /* Tell operation_finalize() not to reschedule the operation */
+    op->cancel = TRUE;
+
+    /* Stop tracking it as a recurring operation, and stop its timer */
     cancel_recurring_action(op);
 
-    if (op->pid == 0) {
-        op->status = PCMK_LRM_OP_CANCELLED;
-        if (op->opaque->callback) {
-            op->opaque->callback(op);
+    /* If the op has a PID, it's an in-flight child process, so kill it.
+     *
+     * Whether the kill succeeds or fails, the main loop will send the op to
+     * operation_finished() (and thus operation_finalize()) when the process
+     * goes away.
+     */
+    if (op->pid != 0) {
+        crm_info("Terminating in-flight op %s (pid %d) early because it was cancelled",
+                 id, op->pid);
+        cancelled = mainloop_child_kill(op->pid);
+        if (cancelled == FALSE) {
+            crm_err("Termination of %s (pid %d) failed", id, op->pid);
         }
+        goto done;
+    }
 
-        blocked_ops = g_list_remove(blocked_ops, op);
-        services_action_free(op);
-
-    } else {
-        crm_info("Cancelling in-flight op: performing early termination of %s (pid=%d)", id, op->pid);
-        op->cancel = 1;
-        if (mainloop_child_kill(op->pid) == FALSE) {
-            /* even though the early termination failed,
-             * the op will be marked as cancelled once it completes. */
-            crm_err("Termination of %s (pid=%d) failed", id, op->pid);
-            free(id);
-            return FALSE;
-        }
+    /* Otherwise, operation is not in-flight, just report as cancelled */
+    op->status = PCMK_LRM_OP_CANCELLED;
+    if (op->opaque->callback) {
+        op->opaque->callback(op);
     }
 
+    blocked_ops = g_list_remove(blocked_ops, op);
+    services_action_free(op);
+    cancelled = TRUE;
+
+done:
     free(id);
-    return TRUE;
+    return cancelled;
 }
 
 gboolean
-- 
1.8.3.1


From deabcc5a6aa93dadf0b20364715b559a5b9848ac Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 28 Feb 2017 15:48:11 -0600
Subject: [PATCH 6/7] Fix: libservices: properly cancel in-flight
 systemd/upstart op

---
 lib/services/services.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/lib/services/services.c b/lib/services/services.c
index dba4e66..4aa4c01 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -89,6 +89,21 @@ resources_find_service_class(const char *agent)
     return NULL;
 }
 
+/*!
+ * \internal
+ * \brief Check whether op is in-flight systemd or upstart op
+ *
+ * \param[in] op  Operation to check
+ *
+ * \return TRUE if op is in-flight systemd or upstart op
+ */
+static inline gboolean
+inflight_systemd_or_upstart(svc_action_t *op)
+{
+    return (safe_str_eq(op->standard, "systemd")
+            || safe_str_eq(op->standard, "upstart"))
+            && (g_list_find(inflight_ops, op) != NULL);
+}
 
 svc_action_t *
 resources_action_create(const char *name, const char *standard, const char *provider,
@@ -513,6 +528,18 @@ services_action_cancel(const char *name, const char *action, int interval)
         goto done;
     }
 
+    /* In-flight systemd and upstart ops don't have a pid. The relevant handlers
+     * will call operation_finalize() when the operation completes.
+     * @TODO: Can we request early termination, maybe using
+     * dbus_pending_call_cancel()?
+     */
+    if (inflight_systemd_or_upstart(op)) {
+        crm_info("Will cancel %s op %s when in-flight instance completes",
+                 op->standard, op->id);
+        cancelled = FALSE;
+        goto done;
+    }
+
     /* Otherwise, operation is not in-flight, just report as cancelled */
     op->status = PCMK_LRM_OP_CANCELLED;
     if (op->opaque->callback) {
-- 
1.8.3.1


From b85037b75255061a41d0ec3fd9b64f271351b43e Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 28 Feb 2017 16:43:52 -0600
Subject: [PATCH 7/7] Fix: libservices: properly detect in-flight
 systemd/upstart ops when kicking

---
 lib/services/services.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/services/services.c b/lib/services/services.c
index 4aa4c01..b41517d 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -568,7 +568,8 @@ services_action_kick(const char *name, const char *action, int interval /* ms */
         return FALSE;
     }
 
-    if (op->pid) {
+
+    if (op->pid || inflight_systemd_or_upstart(op)) {
         return TRUE;
     } else {
         if (op->opaque->repeat_timer) {
-- 
1.8.3.1