|
|
43fe83 |
From d6a370721e1eb2bab9b5a7d66c6e4e935c69f30b Mon Sep 17 00:00:00 2001
|
|
|
43fe83 |
Message-Id: <d6a370721e1eb2bab9b5a7d66c6e4e935c69f30b.1375465853.git.jdenemar@redhat.com>
|
|
|
43fe83 |
From: "Daniel P. Berrange" <berrange@redhat.com>
|
|
|
43fe83 |
Date: Wed, 31 Jul 2013 19:48:18 +0100
|
|
|
43fe83 |
Subject: [PATCH] Add support for systemd cgroup mount
|
|
|
43fe83 |
|
|
|
43fe83 |
https://bugzilla.redhat.com/show_bug.cgi?id=980929
|
|
|
43fe83 |
|
|
|
43fe83 |
Systemd uses a named cgroup mount for tracking processes. Add
|
|
|
43fe83 |
it as another type of controller, albeit one which we have to
|
|
|
43fe83 |
special case in a number of places. In particular we must
|
|
|
43fe83 |
never create/delete directories there, nor add tasks. Essentially
|
|
|
43fe83 |
the systemd mount is to be considered read-only for libvirt.
|
|
|
43fe83 |
|
|
|
43fe83 |
With this change both the virCgroupDetectPlacement and
|
|
|
43fe83 |
virCgroupCopyPlacement methods must be invoked. The copy
|
|
|
43fe83 |
placement method will copy setup for resource controllers
|
|
|
43fe83 |
only. The detect placement method will probe for any
|
|
|
43fe83 |
named controllers, or resource controllers not already
|
|
|
43fe83 |
setup.
|
|
|
43fe83 |
|
|
|
43fe83 |
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
|
|
|
43fe83 |
|
|
|
43fe83 |
(cherry picked from commit aedd46e7e32956b4f360ba631b47064c2bafcbff)
|
|
|
43fe83 |
---
|
|
|
43fe83 |
src/util/vircgroup.c | 68 +++++++++++++++++++++++++++++++++++++++------------
|
|
|
43fe83 |
src/util/vircgroup.h | 1 +
|
|
|
43fe83 |
tests/vircgrouptest.c | 9 +++++++
|
|
|
43fe83 |
3 files changed, 63 insertions(+), 15 deletions(-)
|
|
|
43fe83 |
|
|
|
43fe83 |
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
|
|
|
43fe83 |
index f9007e3..0e8bb79 100644
|
|
|
43fe83 |
--- a/src/util/vircgroup.c
|
|
|
43fe83 |
+++ b/src/util/vircgroup.c
|
|
|
43fe83 |
@@ -57,7 +57,8 @@
|
|
|
43fe83 |
|
|
|
43fe83 |
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
|
|
|
43fe83 |
"cpu", "cpuacct", "cpuset", "memory", "devices",
|
|
|
43fe83 |
- "freezer", "blkio", "net_cls", "perf_event");
|
|
|
43fe83 |
+ "freezer", "blkio", "net_cls", "perf_event",
|
|
|
43fe83 |
+ "name=systemd");
|
|
|
43fe83 |
|
|
|
43fe83 |
typedef enum {
|
|
|
43fe83 |
VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
|
|
|
43fe83 |
@@ -117,6 +118,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group,
|
|
|
43fe83 |
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
|
|
43fe83 |
char *tmp;
|
|
|
43fe83 |
|
|
|
43fe83 |
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
|
|
|
43fe83 |
+ continue;
|
|
|
43fe83 |
+
|
|
|
43fe83 |
if (!group->controllers[i].placement)
|
|
|
43fe83 |
continue;
|
|
|
43fe83 |
|
|
|
43fe83 |
@@ -331,6 +335,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group,
|
|
|
43fe83 |
if (!group->controllers[i].mountPoint)
|
|
|
43fe83 |
continue;
|
|
|
43fe83 |
|
|
|
43fe83 |
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
|
|
|
43fe83 |
+ continue;
|
|
|
43fe83 |
+
|
|
|
43fe83 |
if (path[0] == '/') {
|
|
|
43fe83 |
if (VIR_STRDUP(group->controllers[i].placement, path) < 0)
|
|
|
43fe83 |
return -1;
|
|
|
43fe83 |
@@ -386,6 +393,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
|
|
|
43fe83 |
int ret = -1;
|
|
|
43fe83 |
char *procfile;
|
|
|
43fe83 |
|
|
|
43fe83 |
+ VIR_DEBUG("Detecting placement for pid %lld path %s",
|
|
|
43fe83 |
+ (unsigned long long)pid, path);
|
|
|
43fe83 |
if (pid == -1) {
|
|
|
43fe83 |
if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0)
|
|
|
43fe83 |
goto cleanup;
|
|
|
43fe83 |
@@ -422,6 +431,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
|
|
|
43fe83 |
const char *typestr = virCgroupControllerTypeToString(i);
|
|
|
43fe83 |
int typelen = strlen(typestr);
|
|
|
43fe83 |
char *tmp = controllers;
|
|
|
43fe83 |
+
|
|
|
43fe83 |
while (tmp) {
|
|
|
43fe83 |
char *next = strchr(tmp, ',');
|
|
|
43fe83 |
int len;
|
|
|
43fe83 |
@@ -438,13 +448,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
|
|
|
43fe83 |
* selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo"
|
|
|
43fe83 |
*/
|
|
|
43fe83 |
if (typelen == len && STREQLEN(typestr, tmp, len) &&
|
|
|
43fe83 |
- group->controllers[i].mountPoint != NULL) {
|
|
|
43fe83 |
- if (virAsprintf(&group->controllers[i].placement,
|
|
|
43fe83 |
- "%s%s%s", selfpath,
|
|
|
43fe83 |
- (STREQ(selfpath, "/") ||
|
|
|
43fe83 |
- STREQ(path, "") ? "" : "/"),
|
|
|
43fe83 |
- path) < 0)
|
|
|
43fe83 |
- goto cleanup;
|
|
|
43fe83 |
+ group->controllers[i].mountPoint != NULL &&
|
|
|
43fe83 |
+ group->controllers[i].placement == NULL) {
|
|
|
43fe83 |
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
|
|
|
43fe83 |
+ if (VIR_STRDUP(group->controllers[i].placement,
|
|
|
43fe83 |
+ selfpath) < 0)
|
|
|
43fe83 |
+ goto cleanup;
|
|
|
43fe83 |
+ } else {
|
|
|
43fe83 |
+ if (virAsprintf(&group->controllers[i].placement,
|
|
|
43fe83 |
+ "%s%s%s", selfpath,
|
|
|
43fe83 |
+ (STREQ(selfpath, "/") ||
|
|
|
43fe83 |
+ STREQ(path, "") ? "" : "/"),
|
|
|
43fe83 |
+ path) < 0)
|
|
|
43fe83 |
+ goto cleanup;
|
|
|
43fe83 |
+ }
|
|
|
43fe83 |
}
|
|
|
43fe83 |
|
|
|
43fe83 |
tmp = next;
|
|
|
43fe83 |
@@ -535,13 +552,16 @@ static int virCgroupDetect(virCgroupPtr group,
|
|
|
43fe83 |
return -1;
|
|
|
43fe83 |
}
|
|
|
43fe83 |
|
|
|
43fe83 |
- if (parent || path[0] == '/') {
|
|
|
43fe83 |
- if (virCgroupCopyPlacement(group, path, parent) < 0)
|
|
|
43fe83 |
- return -1;
|
|
|
43fe83 |
- } else {
|
|
|
43fe83 |
- if (virCgroupDetectPlacement(group, pid, path) < 0)
|
|
|
43fe83 |
- return -1;
|
|
|
43fe83 |
- }
|
|
|
43fe83 |
+ /* In some cases we can copy part of the placement info
|
|
|
43fe83 |
+ * based on the parent cgroup...
|
|
|
43fe83 |
+ */
|
|
|
43fe83 |
+ if ((parent || path[0] == '/') &&
|
|
|
43fe83 |
+ virCgroupCopyPlacement(group, path, parent) < 0)
|
|
|
43fe83 |
+ return -1;
|
|
|
43fe83 |
+
|
|
|
43fe83 |
+ /* ... but use /proc/cgroups to fill in the rest */
|
|
|
43fe83 |
+ if (virCgroupDetectPlacement(group, pid, path) < 0)
|
|
|
43fe83 |
+ return -1;
|
|
|
43fe83 |
|
|
|
43fe83 |
/* Check that for every mounted controller, we found our placement */
|
|
|
43fe83 |
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
|
|
43fe83 |
@@ -833,6 +853,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
|
|
|
43fe83 |
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
|
|
|
43fe83 |
char *path = NULL;
|
|
|
43fe83 |
|
|
|
43fe83 |
+ /* We must never mkdir() in systemd's hierarchy */
|
|
|
43fe83 |
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
|
|
|
43fe83 |
+ VIR_DEBUG("Not creating systemd controller group");
|
|
|
43fe83 |
+ continue;
|
|
|
43fe83 |
+ }
|
|
|
43fe83 |
+
|
|
|
43fe83 |
/* Skip over controllers that aren't mounted */
|
|
|
43fe83 |
if (!group->controllers[i].mountPoint) {
|
|
|
43fe83 |
VIR_DEBUG("Skipping unmounted controller %s",
|
|
|
43fe83 |
@@ -1037,6 +1063,10 @@ int virCgroupRemove(virCgroupPtr group)
|
|
|
43fe83 |
if (!group->controllers[i].mountPoint)
|
|
|
43fe83 |
continue;
|
|
|
43fe83 |
|
|
|
43fe83 |
+ /* We must never rmdir() in systemd's hierarchy */
|
|
|
43fe83 |
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
|
|
|
43fe83 |
+ continue;
|
|
|
43fe83 |
+
|
|
|
43fe83 |
/* Don't delete the root group, if we accidentally
|
|
|
43fe83 |
ended up in it for some reason */
|
|
|
43fe83 |
if (STREQ(group->controllers[i].placement, "/"))
|
|
|
43fe83 |
@@ -1076,6 +1106,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid)
|
|
|
43fe83 |
if (!group->controllers[i].mountPoint)
|
|
|
43fe83 |
continue;
|
|
|
43fe83 |
|
|
|
43fe83 |
+ /* We must never add tasks in systemd's hierarchy */
|
|
|
43fe83 |
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
|
|
|
43fe83 |
+ continue;
|
|
|
43fe83 |
+
|
|
|
43fe83 |
if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0)
|
|
|
43fe83 |
goto cleanup;
|
|
|
43fe83 |
}
|
|
|
43fe83 |
@@ -1177,6 +1211,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
|
|
|
43fe83 |
!dest_group->controllers[i].mountPoint)
|
|
|
43fe83 |
continue;
|
|
|
43fe83 |
|
|
|
43fe83 |
+ /* We must never move tasks in systemd's hierarchy */
|
|
|
43fe83 |
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
|
|
|
43fe83 |
+ continue;
|
|
|
43fe83 |
+
|
|
|
43fe83 |
/* New threads are created in the same group as their parent;
|
|
|
43fe83 |
* but if a thread is created after we first read we aren't
|
|
|
43fe83 |
* aware that it needs to move. Therefore, we must iterate
|
|
|
43fe83 |
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
|
|
|
43fe83 |
index 3aaf081..e579f41 100644
|
|
|
43fe83 |
--- a/src/util/vircgroup.h
|
|
|
43fe83 |
+++ b/src/util/vircgroup.h
|
|
|
43fe83 |
@@ -40,6 +40,7 @@ enum {
|
|
|
43fe83 |
VIR_CGROUP_CONTROLLER_BLKIO,
|
|
|
43fe83 |
VIR_CGROUP_CONTROLLER_NET_CLS,
|
|
|
43fe83 |
VIR_CGROUP_CONTROLLER_PERF_EVENT,
|
|
|
43fe83 |
+ VIR_CGROUP_CONTROLLER_SYSTEMD,
|
|
|
43fe83 |
|
|
|
43fe83 |
VIR_CGROUP_CONTROLLER_LAST
|
|
|
43fe83 |
};
|
|
|
43fe83 |
diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c
|
|
|
43fe83 |
index 20ac494..4bdd4c9 100644
|
|
|
43fe83 |
--- a/tests/vircgrouptest.c
|
|
|
43fe83 |
+++ b/tests/vircgrouptest.c
|
|
|
43fe83 |
@@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = {
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
|
|
|
43fe83 |
};
|
|
|
43fe83 |
const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct",
|
|
|
43fe83 |
@@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer",
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio",
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd",
|
|
|
43fe83 |
};
|
|
|
43fe83 |
|
|
|
43fe83 |
const char *links[VIR_CGROUP_CONTROLLER_LAST] = {
|
|
|
43fe83 |
@@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED)
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = "/",
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = "/",
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
|
|
|
43fe83 |
};
|
|
|
43fe83 |
|
|
|
43fe83 |
if (virCgroupNewSelf(&cgroup) < 0) {
|
|
|
43fe83 |
@@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
|
|
|
43fe83 |
};
|
|
|
43fe83 |
const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = {
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition",
|
|
|
43fe83 |
@@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition",
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition",
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
|
|
|
43fe83 |
};
|
|
|
43fe83 |
|
|
|
43fe83 |
if ((rv = virCgroupNewPartition("/virtualmachines", false, -1, &cgroup)) != -1) {
|
|
|
43fe83 |
@@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args ATTRIBUTE_UNUSED)
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = "/deployment.partition/production.partition",
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = "/deployment.partition/production.partition",
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
|
|
|
43fe83 |
};
|
|
|
43fe83 |
|
|
|
43fe83 |
if ((rv = virCgroupNewPartition("/deployment/production", false, -1, &cgroup)) != -1) {
|
|
|
43fe83 |
@@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args ATTRIBUTE_UNUSED
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = "/user/berrange.user/production.partition",
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = "/user/berrange.user/production.partition",
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
|
|
|
43fe83 |
};
|
|
|
43fe83 |
|
|
|
43fe83 |
if ((rv = virCgroupNewPartition("/user/berrange.user/production", false, -1, &cgroup)) != -1) {
|
|
|
43fe83 |
@@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args ATTRIBUTE_UNUSED)
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = "/production.partition/foo.libvirt-lxc",
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = "/production.partition/foo.libvirt-lxc",
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
|
|
|
43fe83 |
};
|
|
|
43fe83 |
|
|
|
43fe83 |
if ((rv = virCgroupNewPartition("/production", true, -1, &partitioncgroup)) != 0) {
|
|
|
43fe83 |
@@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNU
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
|
|
|
43fe83 |
[VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
|
|
|
43fe83 |
+ [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
|
|
|
43fe83 |
};
|
|
|
43fe83 |
|
|
|
43fe83 |
if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) {
|
|
|
43fe83 |
--
|
|
|
43fe83 |
1.8.3.2
|
|
|
43fe83 |
|