Blob Blame History Raw
From d6a370721e1eb2bab9b5a7d66c6e4e935c69f30b Mon Sep 17 00:00:00 2001
Message-Id: <d6a370721e1eb2bab9b5a7d66c6e4e935c69f30b.1375465853.git.jdenemar@redhat.com>
From: "Daniel P. Berrange" <berrange@redhat.com>
Date: Wed, 31 Jul 2013 19:48:18 +0100
Subject: [PATCH] Add support for systemd cgroup mount

https://bugzilla.redhat.com/show_bug.cgi?id=980929

Systemd uses a named cgroup mount for tracking processes. Add
it as another type of controller, albeit one which we have to
special case in a number of places. In particular we must
never create/delete directories there, nor add tasks. Essentially
the systemd mount is to be considered read-only for libvirt.

With this change both the virCgroupDetectPlacement and
virCgroupCopyPlacement methods must be invoked. The copy
placement method will copy setup for resource controllers
only. The detect placement method will probe for any
named controllers, or resource controllers not already
setup.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>

(cherry picked from commit aedd46e7e32956b4f360ba631b47064c2bafcbff)
---
 src/util/vircgroup.c  | 68 +++++++++++++++++++++++++++++++++++++++------------
 src/util/vircgroup.h  |  1 +
 tests/vircgrouptest.c |  9 +++++++
 3 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index f9007e3..0e8bb79 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -57,7 +57,8 @@
 
 VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
               "cpu", "cpuacct", "cpuset", "memory", "devices",
-              "freezer", "blkio", "net_cls", "perf_event");
+              "freezer", "blkio", "net_cls", "perf_event",
+              "name=systemd");
 
 typedef enum {
     VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
@@ -117,6 +118,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group,
     for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
         char *tmp;
 
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+            continue;
+
         if (!group->controllers[i].placement)
             continue;
 
@@ -331,6 +335,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group,
         if (!group->controllers[i].mountPoint)
             continue;
 
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+            continue;
+
         if (path[0] == '/') {
             if (VIR_STRDUP(group->controllers[i].placement, path) < 0)
                 return -1;
@@ -386,6 +393,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
     int ret = -1;
     char *procfile;
 
+    VIR_DEBUG("Detecting placement for pid %lld path %s",
+              (unsigned long long)pid, path);
     if (pid == -1) {
         if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0)
             goto cleanup;
@@ -422,6 +431,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
             const char *typestr = virCgroupControllerTypeToString(i);
             int typelen = strlen(typestr);
             char *tmp = controllers;
+
             while (tmp) {
                 char *next = strchr(tmp, ',');
                 int len;
@@ -438,13 +448,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
                  * selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo"
                  */
                 if (typelen == len && STREQLEN(typestr, tmp, len) &&
-                    group->controllers[i].mountPoint != NULL) {
-                    if (virAsprintf(&group->controllers[i].placement,
-                                    "%s%s%s", selfpath,
-                                    (STREQ(selfpath, "/") ||
-                                     STREQ(path, "") ? "" : "/"),
-                                    path) < 0)
-                        goto cleanup;
+                    group->controllers[i].mountPoint != NULL &&
+                    group->controllers[i].placement == NULL) {
+                    if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
+                        if (VIR_STRDUP(group->controllers[i].placement,
+                                       selfpath) < 0)
+                            goto cleanup;
+                    } else {
+                        if (virAsprintf(&group->controllers[i].placement,
+                                        "%s%s%s", selfpath,
+                                        (STREQ(selfpath, "/") ||
+                                         STREQ(path, "") ? "" : "/"),
+                                        path) < 0)
+                            goto cleanup;
+                    }
                 }
 
                 tmp = next;
@@ -535,13 +552,16 @@ static int virCgroupDetect(virCgroupPtr group,
         return -1;
     }
 
-    if (parent || path[0] == '/') {
-        if (virCgroupCopyPlacement(group, path, parent) < 0)
-            return -1;
-    } else {
-        if (virCgroupDetectPlacement(group, pid, path) < 0)
-            return -1;
-    }
+    /* In some cases we can copy part of the placement info
+     * based on the parent cgroup...
+     */
+    if ((parent || path[0] == '/') &&
+        virCgroupCopyPlacement(group, path, parent) < 0)
+        return -1;
+
+    /* ... but use /proc/cgroups to fill in the rest */
+    if (virCgroupDetectPlacement(group, pid, path) < 0)
+        return -1;
 
     /* Check that for every mounted controller, we found our placement */
     for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
@@ -833,6 +853,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
     for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
         char *path = NULL;
 
+        /* We must never mkdir() in systemd's hierarchy */
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
+            VIR_DEBUG("Not creating systemd controller group");
+            continue;
+        }
+
         /* Skip over controllers that aren't mounted */
         if (!group->controllers[i].mountPoint) {
             VIR_DEBUG("Skipping unmounted controller %s",
@@ -1037,6 +1063,10 @@ int virCgroupRemove(virCgroupPtr group)
         if (!group->controllers[i].mountPoint)
             continue;
 
+        /* We must never rmdir() in systemd's hierarchy */
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+            continue;
+
         /* Don't delete the root group, if we accidentally
            ended up in it for some reason */
         if (STREQ(group->controllers[i].placement, "/"))
@@ -1076,6 +1106,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid)
         if (!group->controllers[i].mountPoint)
             continue;
 
+        /* We must never add tasks in systemd's hierarchy */
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+            continue;
+
         if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0)
             goto cleanup;
     }
@@ -1177,6 +1211,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
             !dest_group->controllers[i].mountPoint)
             continue;
 
+        /* We must never move tasks in systemd's hierarchy */
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+            continue;
+
         /* New threads are created in the same group as their parent;
          * but if a thread is created after we first read we aren't
          * aware that it needs to move.  Therefore, we must iterate
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
index 3aaf081..e579f41 100644
--- a/src/util/vircgroup.h
+++ b/src/util/vircgroup.h
@@ -40,6 +40,7 @@ enum {
     VIR_CGROUP_CONTROLLER_BLKIO,
     VIR_CGROUP_CONTROLLER_NET_CLS,
     VIR_CGROUP_CONTROLLER_PERF_EVENT,
+    VIR_CGROUP_CONTROLLER_SYSTEMD,
 
     VIR_CGROUP_CONTROLLER_LAST
 };
diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c
index 20ac494..4bdd4c9 100644
--- a/tests/vircgrouptest.c
+++ b/tests/vircgrouptest.c
@@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = {
     [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
     [VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
     [VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
+    [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
 };
 const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
     [VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct",
@@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
     [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
     [VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer",
     [VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio",
+    [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd",
 };
 
 const char *links[VIR_CGROUP_CONTROLLER_LAST] = {
@@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED)
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/",
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/",
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
     };
 
     if (virCgroupNewSelf(&cgroup) < 0) {
@@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
         [VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
         [VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
     };
     const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = {
         [VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition",
@@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition",
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition",
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
     };
 
     if ((rv = virCgroupNewPartition("/virtualmachines", false, -1, &cgroup)) != -1) {
@@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args ATTRIBUTE_UNUSED)
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/deployment.partition/production.partition",
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/deployment.partition/production.partition",
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
     };
 
     if ((rv = virCgroupNewPartition("/deployment/production", false, -1, &cgroup)) != -1) {
@@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args ATTRIBUTE_UNUSED
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/user/berrange.user/production.partition",
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/user/berrange.user/production.partition",
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
     };
 
     if ((rv = virCgroupNewPartition("/user/berrange.user/production", false, -1, &cgroup)) != -1) {
@@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args ATTRIBUTE_UNUSED)
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/production.partition/foo.libvirt-lxc",
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/production.partition/foo.libvirt-lxc",
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
     };
 
     if ((rv = virCgroupNewPartition("/production", true, -1, &partitioncgroup)) != 0) {
@@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNU
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
     };
 
     if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) {
-- 
1.8.3.2