From 2593f2e4626fbb6dfef2317bceea4d1b8275f9d8 Mon Sep 17 00:00:00 2001 Message-Id: <2593f2e4626fbb6dfef2317bceea4d1b8275f9d8@dist-git> From: Pavel Hrdina Date: Fri, 19 Feb 2021 13:33:59 +0100 Subject: [PATCH] vircgroup: introduce nested cgroup to properly work with systemd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running on host with systemd we register VMs with machined. In this case systemd creates the root VM cgroup for us. This has some implications where one of them is that systemd owns all files inside the root VM cgroup and we should not touch them. We already use DBus calls for some of the APIs but for the remaining ones we will continue accessing the files directly. Systemd doesn't support threaded cgroups so we need to do this. The reason why we don't use DBus for most of the APIs is that we already have a code that works with files and we would have to check if systemd supports each API. This change introduces new topology on systemd hosts: $ROOT | +- machine.slice | +- machine-qemu\x2d1\x2dvm1.scope | +- libvirt | +- emulator +- vcpu0 +- vcpu0 compared to the previous topology: $ROOT | +- machine.slice | +- machine-qemu\x2d1\x2dvm1.scope | +- emulator +- vcpu0 +- vcpu0 Signed-off-by: Pavel Hrdina Reviewed-by: Michal Privoznik (cherry picked from commit 184245f53b94fc84f727eb6e8a2aa52df02d69c0) Conflicts: src/util/vircgroup.c - missing upstream g_free and g_autofree rewrite Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1798463 Signed-off-by: Pavel Hrdina Message-Id: <51312c8b520e4ed794f8cd8a77b77c228387bb15.1613737828.git.phrdina@redhat.com> Reviewed-by: Ján Tomko --- docs/cgroups.html.in | 29 +++-- src/util/vircgroup.c | 256 +++++++++++++++++++++++++++++++-------- src/util/vircgrouppriv.h | 4 + src/util/vircgroupv1.c | 15 ++- src/util/vircgroupv2.c | 6 + 5 files changed, 245 insertions(+), 65 deletions(-) diff --git a/docs/cgroups.html.in b/docs/cgroups.html.in index 78dede1bba..412a9360ff 100644 --- a/docs/cgroups.html.in +++ b/docs/cgroups.html.in @@ -117,21 +117,27 @@ $ROOT | +- machine-qemu\x2d1\x2dvm1.scope | | - | +- emulator - | +- vcpu0 - | +- vcpu1 + | +- libvirt + | | + | +- emulator + | +- vcpu0 + | +- vcpu1 | +- machine-qemu\x2d2\x2dvm2.scope | | - | +- emulator - | +- vcpu0 - | +- vcpu1 + | +- libvirt + | | + | +- emulator + | +- vcpu0 + | +- vcpu1 | +- machine-qemu\x2d3\x2dvm3.scope | | - | +- emulator - | +- vcpu0 - | +- vcpu1 + | +- libvirt + | | + | +- emulator + | +- vcpu0 + | +- vcpu1 | +- machine-engineering.slice | | @@ -148,6 +154,11 @@ $ROOT +- machine-lxc\x2d33333\x2dcontainer3.scope +

+ Prior libvirt 7.1.0 the topology doesn't have extra + libvirt directory. +

+

Non-systemd cgroups layout

diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 8f5bcd94f4..d0f867ba7f 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -639,6 +639,22 @@ virCgroupMakeGroup(virCgroupPtr parent, } +static bool +virCgroupExists(virCgroupPtr group) +{ + size_t i; + + for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) { + if (group->backends[i] && + !group->backends[i]->exists(group)) { + return false; + } + } + + return true; +} + + /** * virCgroupNew: * @path: path for the new group @@ -695,10 +711,11 @@ virCgroupAddTaskInternal(virCgroupPtr group, unsigned int flags) { size_t i; + virCgroupPtr parent = virCgroupGetNested(group); for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) { - if (group->backends[i] && - group->backends[i]->addTask(group, pid, flags) < 0) { + if (parent->backends[i] && + parent->backends[i]->addTask(parent, pid, flags) < 0) { return -1; } } @@ -871,6 +888,30 @@ virCgroupNewPartition(const char *path, } +static int +virCgroupNewNested(virCgroupPtr parent, + int controllers, + bool create, + pid_t pid, + virCgroupPtr *nested) +{ + virCgroupPtr new = NULL; + + if (virCgroupNew(-1, "libvirt", parent, controllers, &new) < 0) + return -1; + + if (create) { + if (virCgroupMakeGroup(parent, new, create, pid, VIR_CGROUP_NONE) < 0) { + virCgroupFree(&new); + return -1; + } + } + + *nested = g_steal_pointer(&new); + return 0; +} + + /** * virCgroupNewSelf: * @@ -954,6 +995,7 @@ virCgroupNewThread(virCgroupPtr domain, virCgroupPtr *group) { g_autofree char *name = NULL; + virCgroupPtr parent = NULL; int controllers; switch (nameval) { @@ -976,10 +1018,12 @@ virCgroupNewThread(virCgroupPtr domain, (1 << VIR_CGROUP_CONTROLLER_CPUACCT) | (1 << VIR_CGROUP_CONTROLLER_CPUSET)); - if (virCgroupNew(-1, name, domain, controllers, group) < 0) + parent = virCgroupGetNested(domain); + + if (virCgroupNew(-1, name, parent, controllers, group) < 0) return -1; - if (virCgroupMakeGroup(domain, *group, create, -1, VIR_CGROUP_THREAD) < 0) { + if (virCgroupMakeGroup(parent, *group, create, -1, VIR_CGROUP_THREAD) < 0) { virCgroupFree(group); return -1; } @@ -1009,6 +1053,7 @@ virCgroupNewDetectMachine(const char *name, virCgroupPtr *group) { size_t i; + virCgroupPtr nested = NULL; if (virCgroupNewDetect(pid, controllers, group) < 0) { if (virCgroupNewIgnoreError()) @@ -1032,6 +1077,14 @@ virCgroupNewDetectMachine(const char *name, if (virSystemdHasMachined() == 0 && !(*group)->unitName) return -1; + if (virCgroupNewNested((*group), controllers, false, -1, &nested) < 0) + return -1; + + if (virCgroupExists(nested)) + (*group)->nested = g_steal_pointer(&nested); + + virCgroupFree(&nested); + return 0; } @@ -1107,6 +1160,7 @@ virCgroupNewMachineSystemd(const char *name, { int rv; virCgroupPtr init; + virCgroupPtr nested = NULL; g_autofree char *path = NULL; size_t i; @@ -1157,6 +1211,13 @@ virCgroupNewMachineSystemd(const char *name, return -1; } + if (virCgroupNewNested((*group), controllers, true, pidleader, &nested) < 0) { + virCgroupFree(group); + return -1; + } + + (*group)->nested = nested; + if (virCgroupAddProcess(*group, pidleader) < 0) { virErrorPtr saved; @@ -1349,7 +1410,9 @@ virCgroupGetBlkioIoServiced(virCgroupPtr group, long long *requests_read, long long *requests_write) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, getBlkioIoServiced, -1, bytes_read, bytes_write, requests_read, requests_write); @@ -1376,7 +1439,9 @@ virCgroupGetBlkioIoDeviceServiced(virCgroupPtr group, long long *requests_read, long long *requests_write) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, getBlkioIoDeviceServiced, -1, path, bytes_read, bytes_write, requests_read, requests_write); @@ -1427,7 +1492,9 @@ virCgroupSetBlkioDeviceReadIops(virCgroupPtr group, const char *path, unsigned int riops) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, setBlkioDeviceReadIops, -1, path, riops); } @@ -1445,7 +1512,9 @@ virCgroupSetBlkioDeviceWriteIops(virCgroupPtr group, const char *path, unsigned int wiops) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, setBlkioDeviceWriteIops, -1, path, wiops); } @@ -1463,7 +1532,9 @@ virCgroupSetBlkioDeviceReadBps(virCgroupPtr group, const char *path, unsigned long long rbps) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, setBlkioDeviceReadBps, -1, path, rbps); } @@ -1480,7 +1551,9 @@ virCgroupSetBlkioDeviceWriteBps(virCgroupPtr group, const char *path, unsigned long long wbps) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, setBlkioDeviceWriteBps, -1, path, wbps); } @@ -1516,7 +1589,9 @@ virCgroupGetBlkioDeviceReadIops(virCgroupPtr group, const char *path, unsigned int *riops) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, getBlkioDeviceReadIops, -1, path, riops); } @@ -1533,7 +1608,9 @@ virCgroupGetBlkioDeviceWriteIops(virCgroupPtr group, const char *path, unsigned int *wiops) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, getBlkioDeviceWriteIops, -1, path, wiops); } @@ -1550,7 +1627,9 @@ virCgroupGetBlkioDeviceReadBps(virCgroupPtr group, const char *path, unsigned long long *rbps) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, getBlkioDeviceReadBps, -1, path, rbps); } @@ -1567,7 +1646,9 @@ virCgroupGetBlkioDeviceWriteBps(virCgroupPtr group, const char *path, unsigned long long *wbps) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_BLKIO, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_BLKIO, getBlkioDeviceWriteBps, -1, path, wbps); } @@ -1600,7 +1681,9 @@ virCgroupGetBlkioDeviceWeight(virCgroupPtr group, int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, setMemory, -1, kb); } @@ -1627,7 +1710,9 @@ virCgroupGetMemoryStat(virCgroupPtr group, unsigned long long *inactiveFile, unsigned long long *unevictable) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, getMemoryStat, -1, cache, activeAnon, inactiveAnon, activeFile, inactiveFile, @@ -1646,7 +1731,9 @@ virCgroupGetMemoryStat(virCgroupPtr group, int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, getMemoryUsage, -1, kb); } @@ -1662,7 +1749,9 @@ virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb) int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, setMemoryHardLimit, -1, kb); } @@ -1678,7 +1767,9 @@ virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb) int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, getMemoryHardLimit, -1, kb); } @@ -1694,7 +1785,9 @@ virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb) int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, setMemorySoftLimit, -1, kb); } @@ -1710,7 +1803,9 @@ virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb) int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, getMemorySoftLimit, -1, kb); } @@ -1726,7 +1821,9 @@ virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb) int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, setMemSwapHardLimit, -1, kb); } @@ -1742,7 +1839,9 @@ virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb) int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, getMemSwapHardLimit, -1, kb); } @@ -1758,7 +1857,9 @@ virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb) int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_MEMORY, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_MEMORY, getMemSwapUsage, -1, kb); } @@ -1774,7 +1875,9 @@ virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb) int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUSET, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUSET, setCpusetMems, -1, mems); } @@ -1790,7 +1893,9 @@ virCgroupSetCpusetMems(virCgroupPtr group, const char *mems) int virCgroupGetCpusetMems(virCgroupPtr group, char **mems) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUSET, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUSET, getCpusetMems, -1, mems); } @@ -1806,7 +1911,9 @@ virCgroupGetCpusetMems(virCgroupPtr group, char **mems) int virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUSET, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUSET, setCpusetMemoryMigrate, -1, migrate); } @@ -1822,7 +1929,9 @@ virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate) int virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUSET, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUSET, getCpusetMemoryMigrate, -1, migrate); } @@ -1838,7 +1947,9 @@ virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate) int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUSET, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUSET, setCpusetCpus, -1, cpus); } @@ -1854,7 +1965,9 @@ virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus) int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUSET, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUSET, getCpusetCpus, -1, cpus); } @@ -1869,7 +1982,9 @@ virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus) int virCgroupDenyAllDevices(virCgroupPtr group) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_DEVICES, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_DEVICES, denyAllDevices, -1); } @@ -1890,7 +2005,9 @@ virCgroupDenyAllDevices(virCgroupPtr group) int virCgroupAllowAllDevices(virCgroupPtr group, int perms) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_DEVICES, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_DEVICES, allowAllDevices, -1, perms); } @@ -1910,7 +2027,9 @@ int virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor, int perms) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_DEVICES, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_DEVICES, allowDevice, -1, type, major, minor, perms); } @@ -1936,6 +2055,7 @@ virCgroupAllowDevicePath(virCgroupPtr group, bool ignoreEacces) { struct stat sb; + virCgroupPtr parent = virCgroupGetNested(group); if (stat(path, &sb) < 0) { if (errno == EACCES && ignoreEacces) @@ -1950,7 +2070,7 @@ virCgroupAllowDevicePath(virCgroupPtr group, if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) return 1; - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_DEVICES, + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_DEVICES, allowDevice, -1, S_ISCHR(sb.st_mode) ? 'c' : 'b', major(sb.st_rdev), @@ -1974,7 +2094,9 @@ int virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor, int perms) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_DEVICES, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_DEVICES, denyDevice, -1, type, major, minor, perms); } @@ -2000,6 +2122,7 @@ virCgroupDenyDevicePath(virCgroupPtr group, bool ignoreEacces) { struct stat sb; + virCgroupPtr parent = virCgroupGetNested(group); if (stat(path, &sb) < 0) { if (errno == EACCES && ignoreEacces) @@ -2014,7 +2137,7 @@ virCgroupDenyDevicePath(virCgroupPtr group, if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) return 1; - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_DEVICES, + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_DEVICES, denyDevice, -1, S_ISCHR(sb.st_mode) ? 'c' : 'b', major(sb.st_rdev), @@ -2282,7 +2405,9 @@ virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares) int virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPU, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPU, setCpuCfsPeriod, -1, cfs_period); } @@ -2298,7 +2423,9 @@ virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period) int virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPU, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPU, getCpuCfsPeriod, -1, cfs_period); } @@ -2315,7 +2442,9 @@ virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period) int virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPU, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPU, setCpuCfsQuota, -1, cfs_quota); } @@ -2323,7 +2452,9 @@ virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota) int virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUACCT, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUACCT, getCpuacctPercpuUsage, -1, usage); } @@ -2669,7 +2800,9 @@ virCgroupKillPainfully(virCgroupPtr group) int virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPU, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPU, getCpuCfsQuota, -1, cfs_quota); } @@ -2677,7 +2810,9 @@ virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota) int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUACCT, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUACCT, getCpuacctUsage, -1, usage); } @@ -2686,7 +2821,9 @@ int virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user, unsigned long long *sys) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_CPUACCT, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPUACCT, getCpuacctStat, -1, user, sys); } @@ -2694,7 +2831,9 @@ virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user, int virCgroupSetFreezerState(virCgroupPtr group, const char *state) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_FREEZER, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_FREEZER, setFreezerState, -1, state); } @@ -2702,7 +2841,9 @@ virCgroupSetFreezerState(virCgroupPtr group, const char *state) int virCgroupGetFreezerState(virCgroupPtr group, char **state) { - VIR_CGROUP_BACKEND_CALL(group, VIR_CGROUP_CONTROLLER_FREEZER, + virCgroupPtr parent = virCgroupGetNested(group); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_FREEZER, getFreezerState, -1, state); } @@ -2712,10 +2853,11 @@ virCgroupBindMount(virCgroupPtr group, const char *oldroot, const char *mountopts) { size_t i; + virCgroupPtr parent = virCgroupGetNested(group); for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) { - if (group->backends[i] && - group->backends[i]->bindMount(group, oldroot, mountopts) < 0) { + if (parent->backends[i] && + parent->backends[i]->bindMount(parent, oldroot, mountopts) < 0) { return -1; } } @@ -2730,10 +2872,11 @@ int virCgroupSetOwner(virCgroupPtr cgroup, int controllers) { size_t i; + virCgroupPtr parent = virCgroupGetNested(cgroup); for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) { - if (cgroup->backends[i] && - cgroup->backends[i]->setOwner(cgroup, uid, gid, controllers) < 0) { + if (parent->backends[i] && + parent->backends[i]->setOwner(parent, uid, gid, controllers) < 0) { return -1; } } @@ -2752,7 +2895,9 @@ int virCgroupSetOwner(virCgroupPtr cgroup, bool virCgroupSupportsCpuBW(virCgroupPtr cgroup) { - VIR_CGROUP_BACKEND_CALL(cgroup, VIR_CGROUP_CONTROLLER_CPU, + virCgroupPtr parent = virCgroupGetNested(cgroup); + + VIR_CGROUP_BACKEND_CALL(parent, VIR_CGROUP_CONTROLLER_CPU, supportsCpuBW, false); } @@ -2760,10 +2905,11 @@ int virCgroupHasEmptyTasks(virCgroupPtr cgroup, int controller) { size_t i; + virCgroupPtr parent = virCgroupGetNested(cgroup); for (i = 0; i < VIR_CGROUP_BACKEND_TYPE_LAST; i++) { - if (cgroup->backends[i]) { - int rc = cgroup->backends[i]->hasEmptyTasks(cgroup, controller); + if (parent->backends[i]) { + int rc = parent->backends[i]->hasEmptyTasks(parent, controller); if (rc <= 0) return rc; } @@ -3565,6 +3711,7 @@ virCgroupFree(virCgroupPtr *group) VIR_FREE((*group)->unified.mountPoint); VIR_FREE((*group)->unified.placement); VIR_FREE((*group)->unitName); + VIR_FREE((*group)->nested); VIR_FREE((*group)->path); VIR_FREE(*group); @@ -3577,9 +3724,12 @@ virCgroupDelThread(virCgroupPtr cgroup, int idx) { virCgroupPtr new_cgroup = NULL; + virCgroupPtr parent = NULL; if (cgroup) { - if (virCgroupNewThread(cgroup, nameval, idx, false, &new_cgroup) < 0) + parent = virCgroupGetNested(cgroup); + + if (virCgroupNewThread(parent, nameval, idx, false, &new_cgroup) < 0) return -1; /* Remove the offlined cgroup */ diff --git a/src/util/vircgrouppriv.h b/src/util/vircgrouppriv.h index b4a9e0b379..104d74e4d7 100644 --- a/src/util/vircgrouppriv.h +++ b/src/util/vircgrouppriv.h @@ -69,8 +69,12 @@ struct _virCgroup { virCgroupV2Controller unified; char *unitName; + virCgroupPtr nested; }; +#define virCgroupGetNested(cgroup) \ + (cgroup->nested ? cgroup->nested : cgroup) + #define virCgroupSetValueDBus(unitName, key, ...) \ ({ \ int __ret = -1; \ diff --git a/src/util/vircgroupv1.c b/src/util/vircgroupv1.c index 57d617cb69..49a2cb023e 100644 --- a/src/util/vircgroupv1.c +++ b/src/util/vircgroupv1.c @@ -338,6 +338,8 @@ virCgroupV1DetectPlacement(virCgroupPtr group, for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { const char *typestr = virCgroupV1ControllerTypeToString(i); + g_autofree char* placement = NULL; + char *tmp = NULL; if (!virCgroupV1MountOptsMatchController(controllers, typestr)) continue; @@ -348,17 +350,24 @@ virCgroupV1DetectPlacement(virCgroupPtr group, if (group->legacy[i].placement) continue; + /* On systemd we create a nested cgroup for some cgroup tasks + * but the placement should point to the root cgroup. */ + placement = g_strdup(selfpath); + tmp = g_strrstr(placement, "/libvirt"); + if (tmp) + *tmp = '\0'; + /* * selfpath == "/" + path="" -> "/" * selfpath == "/libvirt.service" + path == "" -> "/libvirt.service" * selfpath == "/libvirt.service" + path == "foo" -> "/libvirt.service/foo" */ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) { - group->legacy[i].placement = g_strdup(selfpath); + group->legacy[i].placement = g_strdup(placement); } else { - bool delim = STREQ(selfpath, "/") || STREQ(path, ""); + bool delim = STREQ(placement, "/") || STREQ(path, ""); - group->legacy[i].placement = g_strdup_printf("%s%s%s", selfpath, + group->legacy[i].placement = g_strdup_printf("%s%s%s", placement, delim ? "" : "/", path); } diff --git a/src/util/vircgroupv2.c b/src/util/vircgroupv2.c index d15e2354cf..a14fc669fb 100644 --- a/src/util/vircgroupv2.c +++ b/src/util/vircgroupv2.c @@ -210,6 +210,12 @@ virCgroupV2DetectPlacement(virCgroupPtr group, if (tmp) *tmp = '\0'; + /* On systemd we create a nested cgroup for some cgroup tasks + * but the placement should point to the root cgroup. */ + tmp = g_strrstr(placement, "/libvirt"); + if (tmp) + *tmp = '\0'; + /* * selfpath == "/" + path="" -> "/" * selfpath == "/libvirt.service" + path == "" -> "/libvirt.service" -- 2.30.0