Blob Blame History Raw
From c56086c701d08fc17cf6d8ef603caf505a4021b7 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Fri, 28 Jul 2017 16:32:58 +0200
Subject: [PATCH] main: Add support for libcgroup

When corosync is started in environment where it ends in cgroup without
properly set rt_runtime_us it's impossible to get RT priority.

Already implemented workaround is to use higher non-RT priority.

This patch implements another solution. It moves corosync into root cpu
cgroup. Root cpu cgroup hopefully has enough RT budget.

Another solution was mentioned on ML
https://lists.freedesktop.org/archives/systemd-devel/2017-July/039353.html
but this means to generate some "random" values.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
 configure.ac     |   11 ++++++
 corosync.spec.in |    7 ++++
 exec/Makefile.am |    5 +++
 exec/main.c      |  105 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 man/corosync.8   |    6 +++-
 5 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 81fc91b..e60bf1b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -416,6 +416,10 @@ AC_ARG_ENABLE([qnetd],
 	[  --enable-qnetd                  : Quorum Net Daemon support ],,
 	[ enable_qnetd="no" ])
 AM_CONDITIONAL(BUILD_QNETD, test x$enable_qnetd = xyes)
+AC_ARG_ENABLE([libcgroup],
+	[  --enable-libcgroup                  : Enable libcgroup support ],,
+	[ enable_libcgroup="no" ])
+AM_CONDITIONAL(ENABLE_LIBCGROUP, test x$enable_libcgroup = xyes)
 
 # *FLAGS handling goes here
 
@@ -548,6 +552,13 @@ if test "x${enable_snmp}" = xyes; then
 fi
 AM_CONDITIONAL(BUILD_SNMP, test "${do_snmp}" = "1")
 
+if test "x${enable_libcgroup}" = xyes; then
+    PKG_CHECK_MODULES([libcgroup], [libcgroup])
+    AC_DEFINE_UNQUOTED([HAVE_LIBCGROUP], 1, [have libcgroup])
+    PACKAGE_FEATURES="$PACKAGE_FEATURES libcgroup"
+    WITH_LIST="$WITH_LIST --with libcgroup"
+fi
+
 # extra warnings
 EXTRA_WARNINGS=""
 
diff --git a/corosync.spec.in b/corosync.spec.in
index 97c8e03..49d7b7e 100644
--- a/corosync.spec.in
+++ b/corosync.spec.in
@@ -17,6 +17,7 @@
 %bcond_with runautogen
 %bcond_with qdevices
 %bcond_with qnetd
+%bcond_with libcgroup
 
 %global gitver %{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}
 %global gittarver %{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}
@@ -74,6 +75,9 @@ Requires: nss-tools
 %if %{with qnetd}
 BuildRequires: sed
 %endif
+%if %{with libcgroup}
+BuildRequires: libcgroup-devel
+%endif
 
 BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 
@@ -125,6 +129,9 @@ export rdmacm_LIBS=-lrdmacm \
 %if %{with qnetd}
 	--enable-qnetd \
 %endif
+%if %{with libcgroup}
+	--enable-libcgroup \
+%endif
 	--with-initddir=%{_initrddir} \
 	--with-systemddir=%{_unitdir} \
 	--with-upstartdir=%{_sysconfdir}/init \
diff --git a/exec/Makefile.am b/exec/Makefile.am
index 9ca7720..670daf6 100644
--- a/exec/Makefile.am
+++ b/exec/Makefile.am
@@ -78,5 +78,10 @@ corosync_LDADD		= libtotem_pg.la ../common_lib/libcorosync_common.la \
 
 corosync_DEPENDENCIES	= libtotem_pg.la ../common_lib/libcorosync_common.la
 
+if ENABLE_LIBCGROUP
+corosync_CFLAGS		+= $(libcgroup_CFLAGS)
+corosync_LDADD		+= $(libcgroup_LIBS)
+endif
+
 lint:
 	-splint $(LINT_FLAGS) $(CPPFLAGS) $(CFLAGS) *.c
diff --git a/exec/main.c b/exec/main.c
index 60c01a4..b3e7478 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -110,6 +110,10 @@
 #include <corosync/logsys.h>
 #include <corosync/icmap.h>
 
+#ifdef HAVE_LIBCGROUP
+#include <libcgroup.h>
+#endif
+
 #include "quorum.h"
 #include "totemsrp.h"
 #include "logconfig.h"
@@ -1134,12 +1138,95 @@ error_close:
 	return (err);
 }
 
+static int corosync_move_to_root_cgroup(void) {
+	int res = -1;
+#ifdef HAVE_LIBCGROUP
+	int cg_ret;
+	struct cgroup *root_cgroup = NULL;
+	struct cgroup_controller *root_cpu_cgroup_controller = NULL;
+	char *current_cgroup_path = NULL;
+
+	cg_ret = cgroup_init();
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Unable to initialize libcgroup: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", &current_cgroup_path);
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	if (strcmp(current_cgroup_path, "/") == 0) {
+		log_printf(LOGSYS_LEVEL_DEBUG, "Corosync is already in root cgroup path");
+
+		res = 0;
+		goto exit_res;
+	}
+
+	root_cgroup = cgroup_new_cgroup("/");
+	if (root_cgroup == NULL) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup");
+
+		goto exit_res;
+	}
+
+	root_cpu_cgroup_controller = cgroup_add_controller(root_cgroup, "cpu");
+	if (root_cpu_cgroup_controller == NULL) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup cpu controller");
+
+		goto exit_res;
+	}
+
+	cg_ret = cgroup_attach_task(root_cgroup);
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't attach task to root cgroup: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", &current_cgroup_path);
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	if (strcmp(current_cgroup_path, "/") == 0) {
+		log_printf(LOGSYS_LEVEL_NOTICE, "Corosync sucesfully moved to root cgroup");
+		res = 0;
+	} else {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't move Corosync to root cgroup");
+	}
+
+exit_res:
+	if (root_cgroup != NULL) {
+		cgroup_free(&root_cgroup);
+	}
+
+	/*
+	 * libcgroup doesn't define something like cgroup_fini so there is no way how to clean
+	 * it's cache. It has to be called when libcgroup authors decide to implement it.
+	 */
+
+#endif
+	 return (res);
+}
+
+
 int main (int argc, char **argv, char **envp)
 {
 	const char *error_string;
 	struct totem_config totem_config;
 	int res, ch;
-	int background, sched_rr, prio, testonly;
+	int background, sched_rr, prio, testonly, move_to_root_cgroup;
 	struct stat stat_out;
 	enum e_corosync_done flock_err;
 	uint64_t totem_config_warnings;
@@ -1153,8 +1240,9 @@ int main (int argc, char **argv, char **envp)
 	sched_rr = 1;
 	prio = 0;
 	testonly = 0;
+	move_to_root_cgroup = 1;
 
-	while ((ch = getopt (argc, argv, "fP:prtv")) != EOF) {
+	while ((ch = getopt (argc, argv, "fP:pRrtv")) != EOF) {
 
 		switch (ch) {
 			case 'f':
@@ -1179,6 +1267,9 @@ int main (int argc, char **argv, char **envp)
 					prio = tmpli;
 				}
 				break;
+			case 'R':
+				move_to_root_cgroup = 0;
+				break;
 			case 'r':
 				sched_rr = 1;
 				break;
@@ -1198,6 +1289,7 @@ int main (int argc, char **argv, char **envp)
 					"        -f     : Start application in foreground.\n"\
 					"        -p     : Do not set realtime scheduling.\n"\
 					"        -r     : Set round robin realtime scheduling (default).\n"\
+					"        -R     : Do not try move corosync to root cpu cgroup (valid when built with libcgroup)\n" \
 					"        -P num : Set priority of process (no effect when -r is used)\n"\
 					"        -t     : Test configuration and exit.\n"\
 					"        -v     : Display version and SVN revision of Corosync and exit.\n");
@@ -1312,6 +1404,15 @@ int main (int argc, char **argv, char **envp)
 		corosync_exit_error (COROSYNC_DONE_EXIT);
 	}
 
+
+	/*
+	 * Try to move corosync into root cpu cgroup. Failure is not fatal and
+	 * error is deliberately ignored.
+	 */
+	if (move_to_root_cgroup) {
+		(void)corosync_move_to_root_cgroup();
+	}
+
 	/*
 	 * Set round robin realtime scheduling with priority 99
 	 */
diff --git a/man/corosync.8 b/man/corosync.8
index dc596d1..7bce65e 100644
--- a/man/corosync.8
+++ b/man/corosync.8
@@ -35,7 +35,7 @@
 .SH NAME
 corosync \- The Corosync Cluster Engine.
 .SH SYNOPSIS
-.B "corosync [\-f] [\-P num] [\-p] [\-r] [\-t] [\-v]"
+.B "corosync [\-f] [\-P num] [\-p] [\-r] [-R] [\-t] [\-v]"
 .SH DESCRIPTION
 .B corosync
 Corosync provides clustering infrastructure such as membership, messaging and quorum.
@@ -62,6 +62,10 @@ meaning maximal / minimal priority (so minimal / maximal nice value).
 Set round robin realtime scheduling with maximal priority (default). When setting
 of scheduler fails, fallback to set maximal priority.
 .TP
+.B -R
+Do not try to move Corosync to root cpu cgroup. This feature is available only
+for corosync with libcgroup enabled during the build.
+.TP
 .B -t
 Test configuration and then exit.
 .TP
-- 
1.7.1