43fe83
From d6a370721e1eb2bab9b5a7d66c6e4e935c69f30b Mon Sep 17 00:00:00 2001
43fe83
Message-Id: <d6a370721e1eb2bab9b5a7d66c6e4e935c69f30b.1375465853.git.jdenemar@redhat.com>
43fe83
From: "Daniel P. Berrange" <berrange@redhat.com>
43fe83
Date: Wed, 31 Jul 2013 19:48:18 +0100
43fe83
Subject: [PATCH] Add support for systemd cgroup mount
43fe83
43fe83
https://bugzilla.redhat.com/show_bug.cgi?id=980929
43fe83
43fe83
Systemd uses a named cgroup mount for tracking processes. Add
43fe83
it as another type of controller, albeit one which we have to
43fe83
special case in a number of places. In particular we must
43fe83
never create/delete directories there, nor add tasks. Essentially
43fe83
the systemd mount is to be considered read-only for libvirt.
43fe83
43fe83
With this change both the virCgroupDetectPlacement and
43fe83
virCgroupCopyPlacement methods must be invoked. The copy
43fe83
placement method will copy setup for resource controllers
43fe83
only. The detect placement method will probe for any
43fe83
named controllers, or resource controllers not already
43fe83
setup.
43fe83
43fe83
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
43fe83
43fe83
(cherry picked from commit aedd46e7e32956b4f360ba631b47064c2bafcbff)
43fe83
---
43fe83
 src/util/vircgroup.c  | 68 +++++++++++++++++++++++++++++++++++++++------------
43fe83
 src/util/vircgroup.h  |  1 +
43fe83
 tests/vircgrouptest.c |  9 +++++++
43fe83
 3 files changed, 63 insertions(+), 15 deletions(-)
43fe83
43fe83
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
43fe83
index f9007e3..0e8bb79 100644
43fe83
--- a/src/util/vircgroup.c
43fe83
+++ b/src/util/vircgroup.c
43fe83
@@ -57,7 +57,8 @@
43fe83
 
43fe83
 VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
43fe83
               "cpu", "cpuacct", "cpuset", "memory", "devices",
43fe83
-              "freezer", "blkio", "net_cls", "perf_event");
43fe83
+              "freezer", "blkio", "net_cls", "perf_event",
43fe83
+              "name=systemd");
43fe83
 
43fe83
 typedef enum {
43fe83
     VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
43fe83
@@ -117,6 +118,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group,
43fe83
     for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
43fe83
         char *tmp;
43fe83
 
43fe83
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
43fe83
+            continue;
43fe83
+
43fe83
         if (!group->controllers[i].placement)
43fe83
             continue;
43fe83
 
43fe83
@@ -331,6 +335,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group,
43fe83
         if (!group->controllers[i].mountPoint)
43fe83
             continue;
43fe83
 
43fe83
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
43fe83
+            continue;
43fe83
+
43fe83
         if (path[0] == '/') {
43fe83
             if (VIR_STRDUP(group->controllers[i].placement, path) < 0)
43fe83
                 return -1;
43fe83
@@ -386,6 +393,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
43fe83
     int ret = -1;
43fe83
     char *procfile;
43fe83
 
43fe83
+    VIR_DEBUG("Detecting placement for pid %lld path %s",
43fe83
+              (unsigned long long)pid, path);
43fe83
     if (pid == -1) {
43fe83
         if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0)
43fe83
             goto cleanup;
43fe83
@@ -422,6 +431,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
43fe83
             const char *typestr = virCgroupControllerTypeToString(i);
43fe83
             int typelen = strlen(typestr);
43fe83
             char *tmp = controllers;
43fe83
+
43fe83
             while (tmp) {
43fe83
                 char *next = strchr(tmp, ',');
43fe83
                 int len;
43fe83
@@ -438,13 +448,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
43fe83
                  * selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo"
43fe83
                  */
43fe83
                 if (typelen == len && STREQLEN(typestr, tmp, len) &&
43fe83
-                    group->controllers[i].mountPoint != NULL) {
43fe83
-                    if (virAsprintf(&group->controllers[i].placement,
43fe83
-                                    "%s%s%s", selfpath,
43fe83
-                                    (STREQ(selfpath, "/") ||
43fe83
-                                     STREQ(path, "") ? "" : "/"),
43fe83
-                                    path) < 0)
43fe83
-                        goto cleanup;
43fe83
+                    group->controllers[i].mountPoint != NULL &&
43fe83
+                    group->controllers[i].placement == NULL) {
43fe83
+                    if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
43fe83
+                        if (VIR_STRDUP(group->controllers[i].placement,
43fe83
+                                       selfpath) < 0)
43fe83
+                            goto cleanup;
43fe83
+                    } else {
43fe83
+                        if (virAsprintf(&group->controllers[i].placement,
43fe83
+                                        "%s%s%s", selfpath,
43fe83
+                                        (STREQ(selfpath, "/") ||
43fe83
+                                         STREQ(path, "") ? "" : "/"),
43fe83
+                                        path) < 0)
43fe83
+                            goto cleanup;
43fe83
+                    }
43fe83
                 }
43fe83
 
43fe83
                 tmp = next;
43fe83
@@ -535,13 +552,16 @@ static int virCgroupDetect(virCgroupPtr group,
43fe83
         return -1;
43fe83
     }
43fe83
 
43fe83
-    if (parent || path[0] == '/') {
43fe83
-        if (virCgroupCopyPlacement(group, path, parent) < 0)
43fe83
-            return -1;
43fe83
-    } else {
43fe83
-        if (virCgroupDetectPlacement(group, pid, path) < 0)
43fe83
-            return -1;
43fe83
-    }
43fe83
+    /* In some cases we can copy part of the placement info
43fe83
+     * based on the parent cgroup...
43fe83
+     */
43fe83
+    if ((parent || path[0] == '/') &&
43fe83
+        virCgroupCopyPlacement(group, path, parent) < 0)
43fe83
+        return -1;
43fe83
+
43fe83
+    /* ... but use /proc/cgroups to fill in the rest */
43fe83
+    if (virCgroupDetectPlacement(group, pid, path) < 0)
43fe83
+        return -1;
43fe83
 
43fe83
     /* Check that for every mounted controller, we found our placement */
43fe83
     for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
43fe83
@@ -833,6 +853,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
43fe83
     for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
43fe83
         char *path = NULL;
43fe83
 
43fe83
+        /* We must never mkdir() in systemd's hierarchy */
43fe83
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
43fe83
+            VIR_DEBUG("Not creating systemd controller group");
43fe83
+            continue;
43fe83
+        }
43fe83
+
43fe83
         /* Skip over controllers that aren't mounted */
43fe83
         if (!group->controllers[i].mountPoint) {
43fe83
             VIR_DEBUG("Skipping unmounted controller %s",
43fe83
@@ -1037,6 +1063,10 @@ int virCgroupRemove(virCgroupPtr group)
43fe83
         if (!group->controllers[i].mountPoint)
43fe83
             continue;
43fe83
 
43fe83
+        /* We must never rmdir() in systemd's hierarchy */
43fe83
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
43fe83
+            continue;
43fe83
+
43fe83
         /* Don't delete the root group, if we accidentally
43fe83
            ended up in it for some reason */
43fe83
         if (STREQ(group->controllers[i].placement, "/"))
43fe83
@@ -1076,6 +1106,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid)
43fe83
         if (!group->controllers[i].mountPoint)
43fe83
             continue;
43fe83
 
43fe83
+        /* We must never add tasks in systemd's hierarchy */
43fe83
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
43fe83
+            continue;
43fe83
+
43fe83
         if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0)
43fe83
             goto cleanup;
43fe83
     }
43fe83
@@ -1177,6 +1211,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
43fe83
             !dest_group->controllers[i].mountPoint)
43fe83
             continue;
43fe83
 
43fe83
+        /* We must never move tasks in systemd's hierarchy */
43fe83
+        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
43fe83
+            continue;
43fe83
+
43fe83
         /* New threads are created in the same group as their parent;
43fe83
          * but if a thread is created after we first read we aren't
43fe83
          * aware that it needs to move.  Therefore, we must iterate
43fe83
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
43fe83
index 3aaf081..e579f41 100644
43fe83
--- a/src/util/vircgroup.h
43fe83
+++ b/src/util/vircgroup.h
43fe83
@@ -40,6 +40,7 @@ enum {
43fe83
     VIR_CGROUP_CONTROLLER_BLKIO,
43fe83
     VIR_CGROUP_CONTROLLER_NET_CLS,
43fe83
     VIR_CGROUP_CONTROLLER_PERF_EVENT,
43fe83
+    VIR_CGROUP_CONTROLLER_SYSTEMD,
43fe83
 
43fe83
     VIR_CGROUP_CONTROLLER_LAST
43fe83
 };
43fe83
diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c
43fe83
index 20ac494..4bdd4c9 100644
43fe83
--- a/tests/vircgrouptest.c
43fe83
+++ b/tests/vircgrouptest.c
43fe83
@@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = {
43fe83
     [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
     [VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
43fe83
     [VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
43fe83
+    [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
43fe83
 };
43fe83
 const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
43fe83
     [VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct",
43fe83
@@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
43fe83
     [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
     [VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer",
43fe83
     [VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio",
43fe83
+    [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd",
43fe83
 };
43fe83
 
43fe83
 const char *links[VIR_CGROUP_CONTROLLER_LAST] = {
43fe83
@@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED)
43fe83
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/",
43fe83
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/",
43fe83
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
43fe83
     };
43fe83
 
43fe83
     if (virCgroupNewSelf(&cgroup) < 0) {
43fe83
@@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
43fe83
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
43fe83
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
43fe83
     };
43fe83
     const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = {
43fe83
         [VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition",
43fe83
@@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
43fe83
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition",
43fe83
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition",
43fe83
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
43fe83
     };
43fe83
 
43fe83
     if ((rv = virCgroupNewPartition("/virtualmachines", false, -1, &cgroup)) != -1) {
43fe83
@@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args ATTRIBUTE_UNUSED)
43fe83
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/deployment.partition/production.partition",
43fe83
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/deployment.partition/production.partition",
43fe83
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
43fe83
     };
43fe83
 
43fe83
     if ((rv = virCgroupNewPartition("/deployment/production", false, -1, &cgroup)) != -1) {
43fe83
@@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args ATTRIBUTE_UNUSED
43fe83
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/user/berrange.user/production.partition",
43fe83
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/user/berrange.user/production.partition",
43fe83
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
43fe83
     };
43fe83
 
43fe83
     if ((rv = virCgroupNewPartition("/user/berrange.user/production", false, -1, &cgroup)) != -1) {
43fe83
@@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args ATTRIBUTE_UNUSED)
43fe83
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/production.partition/foo.libvirt-lxc",
43fe83
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/production.partition/foo.libvirt-lxc",
43fe83
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
43fe83
     };
43fe83
 
43fe83
     if ((rv = virCgroupNewPartition("/production", true, -1, &partitioncgroup)) != 0) {
43fe83
@@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNU
43fe83
         [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
43fe83
         [VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
43fe83
         [VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
43fe83
+        [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
43fe83
     };
43fe83
 
43fe83
     if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) {
43fe83
-- 
43fe83
1.8.3.2
43fe83