|
|
60de42 |
From 07c645f1d568b51dc5ae4566dcb7456daba6934c Mon Sep 17 00:00:00 2001
|
|
|
60de42 |
From: "Gao,Yan" <ygao@suse.com>
|
|
|
60de42 |
Date: Wed, 14 Dec 2016 16:08:38 +0100
|
|
|
60de42 |
Subject: [PATCH] Fix: dbus: Prevent lrmd from hanging on dbus calls
|
|
|
60de42 |
|
|
|
60de42 |
With "service" class of resources, by chance, lrmd hangs on futex()
|
|
|
60de42 |
syscall:
|
|
|
60de42 |
|
|
|
60de42 |
root@node2:~ # cat /proc/2503/stack
|
|
|
60de42 |
[<ffffffff810fa0c0>] futex_wait_queue_me+0xc0/0x130
|
|
|
60de42 |
[<ffffffff810faf23>] futex_wait+0x163/0x250
|
|
|
60de42 |
[<ffffffff810fc870>] do_futex+0xe0/0x540
|
|
|
60de42 |
[<ffffffff810fcd3e>] SyS_futex+0x6e/0x140
|
|
|
60de42 |
[<ffffffff815e142e>] entry_SYSCALL_64_fastpath+0x12/0x6d
|
|
|
60de42 |
[<ffffffffffffffff>] 0xffffffffffffffff
|
|
|
60de42 |
|
|
|
60de42 |
Cluster no longer behaves and cannot recover from the situation.
|
|
|
60de42 |
|
|
|
60de42 |
According to the backtrace, it seems due to the reentrancy of
|
|
|
60de42 |
dbus_connection_dispatch():
|
|
|
60de42 |
|
|
|
60de42 |
(gdb) bt
|
|
|
60de42 |
#0 0x00007f07f7d2e0af in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
|
|
|
60de42 |
#1 0x00007f07f6c29925 in _dbus_connection_acquire_dispatch (connection=0x13411f0) at dbus-connection.c:4142
|
|
|
60de42 |
#2 0x00007f07f6c2b3bc in dbus_connection_dispatch (connection=connection@entry=0x13411f0) at dbus-connection.c:4577
|
|
|
60de42 |
#3 0x00007f07f8d88e50 in pcmk_dbus_connection_dispatch (connection=connection@entry=0x13411f0, new_status=new_status@entry=DBUS_DISPATCH_DATA_REMAINS, data=data@entry=0x0) at dbus.c:410
|
|
|
60de42 |
#4 0x00007f07f6c29b70 in _dbus_connection_update_dispatch_status_and_unlock (connection=0x13411f0, new_status=DBUS_DISPATCH_DATA_REMAINS) at dbus-connection.c:4346
|
|
|
60de42 |
#5 0x00007f07f6c29f79 in check_for_reply_and_update_dispatch_unlocked (connection=connection@entry=0x13411f0, pending=pending@entry=0x135a8b0) at dbus-connection.c:2355
|
|
|
60de42 |
#6 0x00007f07f6c2a08b in _dbus_connection_block_pending_call (pending=0x135a8b0) at dbus-connection.c:2461
|
|
|
60de42 |
#7 0x00007f07f6c396ba in dbus_pending_call_block (pending=<optimized out>) at dbus-pending-call.c:741
|
|
|
60de42 |
#8 0x00007f07f8d8929c in pcmk_dbus_send_recv (msg=msg@entry=0x1340940, connection=0x13411f0, error=error@entry=0x7ffc5d148fc0, timeout=-1) at dbus.c:141
|
|
|
60de42 |
#9 0x00007f07f8d8d2d7 in systemd_unit_by_name (arg_name=arg_name@entry=0x133dcb0 "service", op=op@entry=0x0) at systemd.c:296
|
|
|
60de42 |
#10 0x00007f07f8d8d45b in systemd_unit_exists (name=name@entry=0x133dcb0 "service") at systemd.c:416
|
|
|
60de42 |
#11 0x00007f07f8d83dc5 in resources_find_service_class (agent=0x133dcb0 "service") at services.c:88
|
|
|
60de42 |
#12 0x0000000000405b05 in action_complete (action=0x134e0b0) at lrmd.c:876
|
|
|
60de42 |
#13 0x00007f07f8d867e3 in operation_finalize (op=0x134e0b0) at services_linux.c:257
|
|
|
60de42 |
#14 0x00007f07f8d899d8 in pcmk_dbus_lookup_result (reply=reply@entry=0x135cc80, data=data@entry=0x1355e30) at dbus.c:289
|
|
|
60de42 |
#15 0x00007f07f8d89ba4 in pcmk_dbus_lookup_cb (pending=<optimized out>, user_data=0x1355e30) at dbus.c:334
|
|
|
60de42 |
#16 0x00007f07f6c28032 in complete_pending_call_and_unlock (connection=0x13411f0, pending=0x135a2c0, message=<optimized out>) at dbus-connection.c:2331
|
|
|
60de42 |
#17 0x00007f07f6c2b401 in dbus_connection_dispatch (connection=connection@entry=0x13411f0) at dbus-connection.c:4626
|
|
|
60de42 |
#18 0x00007f07f8d88e50 in pcmk_dbus_connection_dispatch (connection=connection@entry=0x13411f0, new_status=new_status@entry=DBUS_DISPATCH_DATA_REMAINS, data=data@entry=0x0) at dbus.c:410
|
|
|
60de42 |
#19 0x00007f07f6c29b70 in _dbus_connection_update_dispatch_status_and_unlock (connection=0x13411f0, new_status=DBUS_DISPATCH_DATA_REMAINS) at dbus-connection.c:4346
|
|
|
60de42 |
#20 0x00007f07f6c29ca6 in _dbus_connection_handle_watch (watch=<optimized out>, condition=1, data=0x13411f0) at dbus-connection.c:1520
|
|
|
60de42 |
#21 0x00007f07f6c40f2a in dbus_watch_handle (watch=watch@entry=0x133d6a0, flags=flags@entry=1) at dbus-watch.c:722
|
|
|
60de42 |
#22 0x00007f07f8d887da in pcmk_dbus_watch_dispatch (userdata=0x133d6a0) at dbus.c:448
|
|
|
60de42 |
#23 0x00007f07f8fcfef7 in mainloop_gio_callback (gio=<optimized out>, condition=G_IO_IN, data=0x133f210) at mainloop.c:673
|
|
|
60de42 |
#24 0x00007f07f82a0015 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0
|
|
|
60de42 |
#25 0x00007f07f82a0388 in ?? () from /usr/lib64/libglib-2.0.so.0
|
|
|
60de42 |
#26 0x00007f07f82a064a in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0
|
|
|
60de42 |
#27 0x0000000000402c0e in main (argc=<optimized out>, argv=0x7ffc5d149818) at main.c:476
|
|
|
60de42 |
|
|
|
60de42 |
As described in:
|
|
|
60de42 |
https://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html#ga55ff88cd22c0672441c7deffbfb68fbf
|
|
|
60de42 |
|
|
|
60de42 |
, dbus_connection_dispatch() MUST NOT BE CALLED from inside the
|
|
|
60de42 |
DBusDispatchStatusFunction. It seems that pcmk_dbus_watch_dispatch() is
|
|
|
60de42 |
an appropriate place to do it instead.
|
|
|
60de42 |
---
|
|
|
60de42 |
lib/services/dbus.c | 30 +++++++++++++++++++++++++-----
|
|
|
60de42 |
1 file changed, 25 insertions(+), 5 deletions(-)
|
|
|
60de42 |
|
|
|
60de42 |
diff --git a/lib/services/dbus.c b/lib/services/dbus.c
|
|
|
60de42 |
index 4ce7dfe..0748c86 100644
|
|
|
60de42 |
--- a/lib/services/dbus.c
|
|
|
60de42 |
+++ b/lib/services/dbus.c
|
|
|
60de42 |
@@ -13,6 +13,8 @@
|
|
|
60de42 |
|
|
|
60de42 |
#define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties"
|
|
|
60de42 |
|
|
|
60de42 |
+static GList *conn_dispatches = NULL;
|
|
|
60de42 |
+
|
|
|
60de42 |
struct db_getall_data {
|
|
|
60de42 |
char *name;
|
|
|
60de42 |
char *target;
|
|
|
60de42 |
@@ -445,17 +447,31 @@ pcmk_dbus_get_property(DBusConnection *connection, const char *target,
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
static void
|
|
|
60de42 |
-pcmk_dbus_connection_dispatch(DBusConnection *connection,
|
|
|
60de42 |
+pcmk_dbus_connection_dispatch_status(DBusConnection *connection,
|
|
|
60de42 |
DBusDispatchStatus new_status, void *data)
|
|
|
60de42 |
{
|
|
|
60de42 |
- crm_trace("status %d for %p", new_status, data);
|
|
|
60de42 |
+ crm_trace("New status %d for connection %p", new_status, connection);
|
|
|
60de42 |
if (new_status == DBUS_DISPATCH_DATA_REMAINS){
|
|
|
60de42 |
- dbus_connection_dispatch(connection);
|
|
|
60de42 |
+ conn_dispatches = g_list_prepend(conn_dispatches, connection);
|
|
|
60de42 |
+ }
|
|
|
60de42 |
+}
|
|
|
60de42 |
+
|
|
|
60de42 |
+static void
|
|
|
60de42 |
+pcmk_dbus_connections_dispatch()
|
|
|
60de42 |
+{
|
|
|
60de42 |
+ GList *gIter = NULL;
|
|
|
60de42 |
+
|
|
|
60de42 |
+ for (gIter = conn_dispatches; gIter != NULL; gIter = gIter->next) {
|
|
|
60de42 |
+ DBusConnection *connection = gIter->data;
|
|
|
60de42 |
|
|
|
60de42 |
while (dbus_connection_get_dispatch_status(connection) == DBUS_DISPATCH_DATA_REMAINS) {
|
|
|
60de42 |
+ crm_trace("Dispatching for connection %p", connection);
|
|
|
60de42 |
dbus_connection_dispatch(connection);
|
|
|
60de42 |
}
|
|
|
60de42 |
}
|
|
|
60de42 |
+
|
|
|
60de42 |
+ g_list_free(conn_dispatches);
|
|
|
60de42 |
+ conn_dispatches = NULL;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
/* Copied from dbus-watch.c */
|
|
|
60de42 |
@@ -506,7 +522,11 @@ pcmk_dbus_watch_dispatch(gpointer userdata)
|
|
|
60de42 |
if(oom) {
|
|
|
60de42 |
crm_err("DBus encountered OOM while attempting to dispatch %p (%s)",
|
|
|
60de42 |
client, dbus_watch_flags_to_string(flags));
|
|
|
60de42 |
+
|
|
|
60de42 |
+ } else {
|
|
|
60de42 |
+ pcmk_dbus_connections_dispatch();
|
|
|
60de42 |
}
|
|
|
60de42 |
+
|
|
|
60de42 |
return 0;
|
|
|
60de42 |
}
|
|
|
60de42 |
|
|
|
60de42 |
@@ -616,6 +636,6 @@ pcmk_dbus_connection_setup_with_select(DBusConnection *c)
|
|
|
60de42 |
dbus_connection_set_watch_functions(c, pcmk_dbus_watch_add,
|
|
|
60de42 |
pcmk_dbus_watch_remove,
|
|
|
60de42 |
pcmk_dbus_watch_toggle, NULL, NULL);
|
|
|
60de42 |
- dbus_connection_set_dispatch_status_function(c, pcmk_dbus_connection_dispatch, NULL, NULL);
|
|
|
60de42 |
- pcmk_dbus_connection_dispatch(c, dbus_connection_get_dispatch_status(c), NULL);
|
|
|
60de42 |
+ dbus_connection_set_dispatch_status_function(c, pcmk_dbus_connection_dispatch_status, NULL, NULL);
|
|
|
60de42 |
+ pcmk_dbus_connection_dispatch_status(c, dbus_connection_get_dispatch_status(c), NULL);
|
|
|
60de42 |
}
|
|
|
60de42 |
--
|
|
|
60de42 |
1.8.3.1
|
|
|
60de42 |
|