From 5fb3ec71e0e67f1f1c0157c9c9038c5136820799 Mon Sep 17 00:00:00 2001 Message-Id: <5fb3ec71e0e67f1f1c0157c9c9038c5136820799@dist-git> From: Martin Kletzander Date: Wed, 31 Jan 2018 16:32:29 +0100 Subject: [PATCH] resctrl: Add functions to work with resctrl allocations With this commit we finally have a way to read and manipulate basic resctrl settings. Locking is done only on exposed functions that read/write from/to resctrlfs. Not in functions that are exposed in virresctrlpriv.h as those are only supposed to be used from tests. More information about how resctrl works: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/x86/intel_rdt_ui.txt Signed-off-by: Martin Kletzander (cherry picked from commit a64c761c275c61517360cf1f5af5461fc5b93138) Signed-off-by: Martin Kletzander https://bugzilla.redhat.com/show_bug.cgi?id=1289368 Conflicts: - whitespace due to 3e7db8d3e8538bcd5deb1b111fb1233fc18831aa Signed-off-by: Martin Kletzander --- src/Makefile.am | 2 +- src/libvirt_private.syms | 10 + src/util/virresctrl.c | 1174 ++++++++++++++++++++++++++++++++++++++++++++- src/util/virresctrl.h | 49 ++ src/util/virresctrlpriv.h | 27 ++ 5 files changed, 1260 insertions(+), 2 deletions(-) create mode 100644 src/util/virresctrlpriv.h diff --git a/src/Makefile.am b/src/Makefile.am index b74856ba78..6b24a47afe 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -167,7 +167,7 @@ UTIL_SOURCES = \ util/virprocess.c util/virprocess.h \ util/virqemu.c util/virqemu.h \ util/virrandom.h util/virrandom.c \ - util/virresctrl.h util/virresctrl.c \ + util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \ util/virrotatingfile.h util/virrotatingfile.c \ util/virscsi.c util/virscsi.h \ util/virscsihost.c util/virscsihost.h \ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 02bad6c0ce..ee75e09767 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2547,6 +2547,16 @@ virRandomInt; # util/virresctrl.h virCacheTypeFromString; virCacheTypeToString; +virResctrlAllocAddPID; +virResctrlAllocCreate; +virResctrlAllocForeachSize; +virResctrlAllocFormat; +virResctrlAllocGetID; +virResctrlAllocGetUnused; +virResctrlAllocNew; +virResctrlAllocRemove; +virResctrlAllocSetID; +virResctrlAllocSetSize; virResctrlGetInfo; virResctrlInfoGetCache; virResctrlInfoNew; diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c index d439e7792c..e252aefe31 100644 --- a/src/util/virresctrl.c +++ b/src/util/virresctrl.c @@ -18,8 +18,12 @@ #include -#include "virresctrl.h" +#include +#include +#include +#include +#include "virresctrlpriv.h" #include "c-ctype.h" #include "count-one-bits.h" #include "viralloc.h" @@ -152,6 +156,193 @@ virResctrlInfoNew(void) } +/* Alloc-related definitions and AllocClass-related functions */ + +/* + * virResctrlAlloc represents one allocation (in XML under cputune/cachetune and + * consequently a directory under /sys/fs/resctrl). Since it can have multiple + * parts of multiple caches allocated it is represented as bunch of nested + * sparse arrays (by sparse I mean array of pointers so that each might be NULL + * in case there is no allocation for that particular one (level, cache, ...)). + * + * Since one allocation can be made for caches on different levels, the first + * nested sparse array is of types virResctrlAllocPerLevel. For example if you + * have allocation for level 3 cache, there will be three NULL pointers and then + * allocated pointer to virResctrlAllocPerLevel. That way you can access it by + * `alloc[level]` as O(1) is desired instead of crawling through normal arrays + * or lists in three nested loops. The code uses a lot of direct accesses. + * + * Each virResctrlAllocPerLevel can have allocations for different cache + * allocation types. You can allocate instruction cache (VIR_CACHE_TYPE_CODE), + * data cache (VIR_CACHE_TYPE_DATA) or unified cache (VIR_CACHE_TYPE_BOTH). + * Those allocations are kept in sparse array of virResctrlAllocPerType pointers. + * + * For each virResctrlAllocPerType users can request some size of the cache to + * be allocated. That's what the sparse array `sizes` is for. Non-NULL + * pointers represent requested size allocations. The array is indexed by host + * cache id (gotten from `/sys/devices/system/cpu/cpuX/cache/indexY/id`). Users + * can see this information e.g. in the output of `virsh capabilities` (for that + * information there's the other struct, namely `virResctrlInfo`). + * + * When allocation is being created we need to find unused part of the cache for + * all of them. While doing that we store the bitmask in a sparse array of + * virBitmaps named `masks` indexed the same way as `sizes`. The upper bounds + * of the sparse arrays are stored in nmasks or nsizes, respectively. + */ +typedef struct _virResctrlAllocPerType virResctrlAllocPerType; +typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr; +struct _virResctrlAllocPerType { + /* There could be bool saying whether this is set or not, but since everything + * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would + * have to have one more level of allocation anyway, so this stays faithful to + * the concept */ + unsigned long long **sizes; + size_t nsizes; + + /* Mask for each cache */ + virBitmapPtr *masks; + size_t nmasks; +}; + +typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel; +typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr; +struct _virResctrlAllocPerLevel { + virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */ + /* There is no `ntypes` member variable as it is always allocated for + * VIR_CACHE_TYPE_LAST number of items */ +}; + +struct _virResctrlAlloc { + virObject parent; + + virResctrlAllocPerLevelPtr *levels; + size_t nlevels; + + /* The identifier (any unique string for now) */ + char *id; + /* libvirt-generated path in /sys/fs/resctrl for this particular + * allocation */ + char *path; +}; + +static virClassPtr virResctrlAllocClass; + +static void +virResctrlAllocDispose(void *obj) +{ + size_t i = 0; + size_t j = 0; + size_t k = 0; + + virResctrlAllocPtr resctrl = obj; + + for (i = 0; i < resctrl->nlevels; i++) { + virResctrlAllocPerLevelPtr level = resctrl->levels[i]; + + if (!level) + continue; + + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) { + virResctrlAllocPerTypePtr type = level->types[j]; + + if (!type) + continue; + + for (k = 0; k < type->nsizes; k++) + VIR_FREE(type->sizes[k]); + + for (k = 0; k < type->nmasks; k++) + virBitmapFree(type->masks[k]); + + VIR_FREE(type->sizes); + VIR_FREE(type->masks); + VIR_FREE(type); + } + VIR_FREE(level->types); + VIR_FREE(level); + } + + VIR_FREE(resctrl->id); + VIR_FREE(resctrl->path); + VIR_FREE(resctrl->levels); +} + + +static int +virResctrlAllocOnceInit(void) +{ + if (!(virResctrlAllocClass = virClassNew(virClassForObject(), + "virResctrlAlloc", + sizeof(virResctrlAlloc), + virResctrlAllocDispose))) + return -1; + + return 0; +} + + +VIR_ONCE_GLOBAL_INIT(virResctrlAlloc) + + +virResctrlAllocPtr +virResctrlAllocNew(void) +{ + if (virResctrlAllocInitialize() < 0) + return NULL; + + return virObjectNew(virResctrlAllocClass); +} + + +/* Common functions */ +static int +virResctrlLockInternal(int op) +{ + int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC); + + if (fd < 0) { + virReportSystemError(errno, "%s", _("Cannot open resctrlfs")); + return -1; + } + + if (flock(fd, op) < 0) { + virReportSystemError(errno, "%s", _("Cannot lock resctrlfs")); + VIR_FORCE_CLOSE(fd); + return -1; + } + + return fd; +} + + +static inline int +virResctrlLockWrite(void) +{ + return virResctrlLockInternal(LOCK_EX); +} + + +static int +virResctrlUnlock(int fd) +{ + if (fd == -1) + return 0; + + /* The lock gets unlocked by closing the fd, which we need to do anyway in + * order to clean up properly */ + if (VIR_CLOSE(fd) < 0) { + virReportSystemError(errno, "%s", _("Cannot close resctrlfs")); + + /* Trying to save the already broken */ + if (flock(fd, LOCK_UN) < 0) + virReportSystemError(errno, "%s", _("Cannot unlock resctrlfs")); + return -1; + } + + return 0; +} + + /* Info-related functions */ static bool virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl) @@ -367,3 +558,984 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl, VIR_FREE(*controls); goto cleanup; } + + +/* Alloc-related functions */ +bool +virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl) +{ + size_t i = 0; + size_t j = 0; + size_t k = 0; + + if (!resctrl) + return true; + + for (i = 0; i < resctrl->nlevels; i++) { + virResctrlAllocPerLevelPtr a_level = resctrl->levels[i]; + + if (!a_level) + continue; + + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) { + virResctrlAllocPerTypePtr a_type = a_level->types[j]; + + if (!a_type) + continue; + + for (k = 0; k < a_type->nsizes; k++) { + if (a_type->sizes[k]) + return false; + } + + for (k = 0; k < a_type->nmasks; k++) { + if (a_type->masks[k]) + return false; + } + } + } + + return true; +} + + +static virResctrlAllocPerTypePtr +virResctrlAllocGetType(virResctrlAllocPtr resctrl, + unsigned int level, + virCacheType type) +{ + virResctrlAllocPerLevelPtr a_level = NULL; + + if (resctrl->nlevels <= level && + VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels + 1) < 0) + return NULL; + + if (!resctrl->levels[level]) { + virResctrlAllocPerTypePtr *types = NULL; + + if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0) + return NULL; + + if (VIR_ALLOC(resctrl->levels[level]) < 0) { + VIR_FREE(types); + return NULL; + } + resctrl->levels[level]->types = types; + } + + a_level = resctrl->levels[level]; + + if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0) + return NULL; + + return a_level->types[type]; +} + + +static int +virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl, + unsigned int level, + virCacheType type, + unsigned int cache, + virBitmapPtr mask) +{ + virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type); + + if (!a_type) + return -1; + + if (a_type->nmasks <= cache && + VIR_EXPAND_N(a_type->masks, a_type->nmasks, + cache - a_type->nmasks + 1) < 0) + return -1; + + if (!a_type->masks[cache]) { + a_type->masks[cache] = virBitmapNew(virBitmapSize(mask)); + + if (!a_type->masks[cache]) + return -1; + } + + return virBitmapCopy(a_type->masks[cache], mask); +} + + +static int +virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl, + unsigned int level, + virCacheType type, + unsigned int cache, + unsigned long long size) +{ + virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type); + + if (!a_type) + return -1; + + if (a_type->nsizes <= cache && + VIR_EXPAND_N(a_type->sizes, a_type->nsizes, + cache - a_type->nsizes + 1) < 0) + return -1; + + if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0) + return -1; + + *(a_type->sizes[cache]) = size; + + return 0; +} + + +/* + * Check if there is an allocation for this level/type/cache already. Called + * before updating the structure. VIR_CACHE_TYPE_BOTH collides with any type, + * the other types collide with itself. This code basically checks if either: + * `alloc[level]->types[type]->sizes[cache]` + * or + * `alloc[level]->types[VIR_CACHE_TYPE_BOTH]->sizes[cache]` + * is non-NULL. All the fuzz around it is checking for NULL pointers along + * the way. + */ +static bool +virResctrlAllocCheckCollision(virResctrlAllocPtr alloc, + unsigned int level, + virCacheType type, + unsigned int cache) +{ + virResctrlAllocPerLevelPtr a_level = NULL; + virResctrlAllocPerTypePtr a_type = NULL; + + if (!alloc) + return false; + + if (alloc->nlevels <= level) + return false; + + a_level = alloc->levels[level]; + + if (!a_level) + return false; + + a_type = a_level->types[VIR_CACHE_TYPE_BOTH]; + + /* If there is an allocation for type 'both', there can be no other + * allocation for the same cache */ + if (a_type && a_type->nsizes > cache && a_type->sizes[cache]) + return true; + + if (type == VIR_CACHE_TYPE_BOTH) { + a_type = a_level->types[VIR_CACHE_TYPE_CODE]; + + if (a_type && a_type->nsizes > cache && a_type->sizes[cache]) + return true; + + a_type = a_level->types[VIR_CACHE_TYPE_DATA]; + + if (a_type && a_type->nsizes > cache && a_type->sizes[cache]) + return true; + } else { + a_type = a_level->types[type]; + + if (a_type && a_type->nsizes > cache && a_type->sizes[cache]) + return true; + } + + return false; +} + + +int +virResctrlAllocSetSize(virResctrlAllocPtr resctrl, + unsigned int level, + virCacheType type, + unsigned int cache, + unsigned long long size) +{ + if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) { + virReportError(VIR_ERR_XML_ERROR, + _("Colliding cache allocations for cache " + "level '%u' id '%u', type '%s'"), + level, cache, virCacheTypeToString(type)); + return -1; + } + + return virResctrlAllocUpdateSize(resctrl, level, type, cache, size); +} + + +int +virResctrlAllocForeachSize(virResctrlAllocPtr resctrl, + virResctrlAllocForeachSizeCallback cb, + void *opaque) +{ + int ret = 0; + unsigned int level = 0; + unsigned int type = 0; + unsigned int cache = 0; + + if (!resctrl) + return 0; + + for (level = 0; level < resctrl->nlevels; level++) { + virResctrlAllocPerLevelPtr a_level = resctrl->levels[level]; + + if (!a_level) + continue; + + for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) { + virResctrlAllocPerTypePtr a_type = a_level->types[type]; + + if (!a_type) + continue; + + for (cache = 0; cache < a_type->nsizes; cache++) { + unsigned long long *size = a_type->sizes[cache]; + + if (!size) + continue; + + ret = cb(level, type, cache, *size, opaque); + if (ret < 0) + return ret; + } + } + } + + return 0; +} + + +int +virResctrlAllocSetID(virResctrlAllocPtr alloc, + const char *id) +{ + if (!id) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Resctrl allocation 'id' cannot be NULL")); + return -1; + } + + return VIR_STRDUP(alloc->id, id); +} + + +const char * +virResctrlAllocGetID(virResctrlAllocPtr alloc) +{ + return alloc->id; +} + + +char * +virResctrlAllocFormat(virResctrlAllocPtr resctrl) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + unsigned int level = 0; + unsigned int type = 0; + unsigned int cache = 0; + + if (!resctrl) + return NULL; + + for (level = 0; level < resctrl->nlevels; level++) { + virResctrlAllocPerLevelPtr a_level = resctrl->levels[level]; + + if (!a_level) + continue; + + for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) { + virResctrlAllocPerTypePtr a_type = a_level->types[type]; + + if (!a_type) + continue; + + virBufferAsprintf(&buf, "L%u%s:", level, virResctrlTypeToString(type)); + + for (cache = 0; cache < a_type->nmasks; cache++) { + virBitmapPtr mask = a_type->masks[cache]; + char *mask_str = NULL; + + if (!mask) + continue; + + mask_str = virBitmapToString(mask, false, true); + if (!mask_str) { + virBufferFreeAndReset(&buf); + return NULL; + } + + virBufferAsprintf(&buf, "%u=%s;", cache, mask_str); + VIR_FREE(mask_str); + } + + virBufferTrim(&buf, ";", 1); + virBufferAddChar(&buf, '\n'); + } + } + + virBufferCheckError(&buf); + return virBufferContentAndReset(&buf); +} + + +static int +virResctrlAllocParseProcessCache(virResctrlInfoPtr resctrl, + virResctrlAllocPtr alloc, + unsigned int level, + virCacheType type, + char *cache) +{ + char *tmp = strchr(cache, '='); + unsigned int cache_id = 0; + virBitmapPtr mask = NULL; + int ret = -1; + + if (!tmp) + return 0; + + *tmp = '\0'; + tmp++; + + if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Invalid cache id '%s'"), cache); + return -1; + } + + mask = virBitmapNewString(tmp); + if (!mask) + return -1; + + virBitmapShrink(mask, resctrl->levels[level]->types[type]->bits); + + if (virResctrlAllocUpdateMask(alloc, level, type, cache_id, mask) < 0) + goto cleanup; + + ret = 0; + cleanup: + virBitmapFree(mask); + return ret; +} + + +static int +virResctrlAllocParseProcessLine(virResctrlInfoPtr resctrl, + virResctrlAllocPtr alloc, + char *line) +{ + char **caches = NULL; + char *tmp = NULL; + unsigned int level = 0; + int type = -1; + size_t ncaches = 0; + size_t i = 0; + int ret = -1; + + /* For no reason there can be spaces */ + virSkipSpaces((const char **) &line); + + /* Skip lines that don't concern caches, e.g. MB: etc. */ + if (line[0] != 'L') + return 0; + + /* And lines that we can't parse too */ + tmp = strchr(line, ':'); + if (!tmp) + return 0; + + *tmp = '\0'; + tmp++; + + if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot parse resctrl schema level '%s'"), + line + 1); + return -1; + } + + type = virResctrlTypeFromString(line); + if (type < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot parse resctrl schema level '%s'"), + line + 1); + return -1; + } + + caches = virStringSplitCount(tmp, ";", 0, &ncaches); + if (!caches) + return 0; + + for (i = 0; i < ncaches; i++) { + if (virResctrlAllocParseProcessCache(resctrl, alloc, level, type, caches[i]) < 0) + goto cleanup; + } + + ret = 0; + cleanup: + virStringListFree(caches); + return ret; +} + + +static int +virResctrlAllocParse(virResctrlInfoPtr resctrl, + virResctrlAllocPtr alloc, + const char *schemata) +{ + char **lines = NULL; + size_t nlines = 0; + size_t i = 0; + int ret = -1; + + lines = virStringSplitCount(schemata, "\n", 0, &nlines); + for (i = 0; i < nlines; i++) { + if (virResctrlAllocParseProcessLine(resctrl, alloc, lines[i]) < 0) + goto cleanup; + } + + ret = 0; + cleanup: + virStringListFree(lines); + return ret; +} + + +static void +virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr dst, + virResctrlAllocPerTypePtr src) +{ + size_t i = 0; + + if (!dst || !src) + return; + + for (i = 0; i < dst->nmasks && i < src->nmasks; i++) { + if (dst->masks[i] && src->masks[i]) + virBitmapSubtract(dst->masks[i], src->masks[i]); + } +} + + +static void +virResctrlAllocSubtract(virResctrlAllocPtr dst, + virResctrlAllocPtr src) +{ + size_t i = 0; + size_t j = 0; + + if (!src) + return; + + for (i = 0; i < dst->nlevels && i < src->nlevels; i++) { + if (dst->levels[i] && src->levels[i]) { + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) { + virResctrlAllocSubtractPerType(dst->levels[i]->types[j], + src->levels[i]->types[j]); + } + } + } +} + + +static virResctrlAllocPtr +virResctrlAllocNewFromInfo(virResctrlInfoPtr info) +{ + size_t i = 0; + size_t j = 0; + size_t k = 0; + virResctrlAllocPtr ret = virResctrlAllocNew(); + virBitmapPtr mask = NULL; + + if (!ret) + return NULL; + + for (i = 0; i < info->nlevels; i++) { + virResctrlInfoPerLevelPtr i_level = info->levels[i]; + + if (!i_level) + continue; + + for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) { + virResctrlInfoPerTypePtr i_type = i_level->types[j]; + + if (!i_type) + continue; + + virBitmapFree(mask); + mask = virBitmapNew(i_type->bits); + if (!mask) + goto error; + virBitmapSetAll(mask); + + for (k = 0; k <= i_type->max_cache_id; k++) { + if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0) + goto error; + } + } + } + + cleanup: + virBitmapFree(mask); + return ret; + error: + virObjectUnref(ret); + ret = NULL; + goto cleanup; +} + +/* + * This function creates an allocation that represents all unused parts of all + * caches in the system. It uses virResctrlInfo for creating a new full + * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then + * scans for all allocations under /sys/fs/resctrl and subtracts each one of + * them from it. That way it can then return an allocation with only bit set + * being those that are not mentioned in any other allocation. It is used for + * two things, a) calculating the masks when creating allocations and b) from + * tests. + */ +virResctrlAllocPtr +virResctrlAllocGetUnused(virResctrlInfoPtr resctrl) +{ + virResctrlAllocPtr ret = NULL; + virResctrlAllocPtr alloc = NULL; + struct dirent *ent = NULL; + DIR *dirp = NULL; + char *schemata = NULL; + int rv = -1; + + if (virResctrlInfoIsEmpty(resctrl)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Resource control is not supported on this host")); + return NULL; + } + + ret = virResctrlAllocNewFromInfo(resctrl); + if (!ret) + return NULL; + + if (virFileReadValueString(&schemata, + SYSFS_RESCTRL_PATH + "/schemata") < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Could not read schemata file for the default group")); + goto error; + } + + alloc = virResctrlAllocNew(); + if (!alloc) + goto error; + + if (virResctrlAllocParse(resctrl, alloc, schemata) < 0) + goto error; + + virResctrlAllocSubtract(ret, alloc); + + if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0) + goto error; + + while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) { + if (ent->d_type != DT_DIR) + continue; + + if (STREQ(ent->d_name, "info")) + continue; + + VIR_FREE(schemata); + rv = virFileReadValueString(&schemata, + SYSFS_RESCTRL_PATH + "/%s/schemata", + ent->d_name); + if (rv == -2) + continue; + + if (rv < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Could not read schemata file for group %s"), + ent->d_name); + goto error; + } + + virObjectUnref(alloc); + alloc = virResctrlAllocNew(); + if (!alloc) + goto error; + + if (virResctrlAllocParse(resctrl, alloc, schemata) < 0) + goto error; + + virResctrlAllocSubtract(ret, alloc); + } + if (rv < 0) + goto error; + + cleanup: + virObjectUnref(alloc); + VIR_DIR_CLOSE(dirp); + VIR_FREE(schemata); + return ret; + + error: + virObjectUnref(ret); + ret = NULL; + goto cleanup; +} + + + +static int +virResctrlAllocSetMask(virResctrlAllocPerTypePtr a_type, + unsigned int cache, + virBitmapPtr mask) +{ + if (a_type->nmasks <= cache && + VIR_EXPAND_N(a_type->masks, a_type->nmasks, + cache - a_type->nmasks + 1) < 0) + return -1; + + a_type->masks[cache] = mask; + + return 0; +} + + +/* + * Given the information about requested allocation type `a_type`, the host + * cache for a particular type `i_type` and unused bits in the system `f_type` + * this function tries to find the smallest free space in which the allocation + * for cache id `cache` would fit. We're looking for the smallest place in + * order to minimize fragmentation and maximize the possibility of succeeding. + */ +static int +virResctrlAllocFindUnused(virResctrlAllocPerTypePtr a_type, + virResctrlInfoPerTypePtr i_type, + virResctrlAllocPerTypePtr f_type, + unsigned int level, + unsigned int type, + unsigned int cache) +{ + unsigned long long *size = a_type->sizes[cache]; + virBitmapPtr a_mask = NULL; + virBitmapPtr f_mask = NULL; + unsigned long long need_bits; + size_t i = 0; + ssize_t pos = -1; + ssize_t last_bits = 0; + ssize_t last_pos = -1; + + /* If there is no reservation requested we need to set all bits. That's due + * to weird interface of the resctrl sysfs. It's also the reason why we + * cannot reserve the whole cache in one allocation. */ + if (!size) { + a_mask = virBitmapNew(i_type->bits); + if (!a_mask) + return -1; + + virBitmapSetAll(a_mask); + + if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) { + virBitmapFree(a_mask); + return -1; + } + + return 0; + } + + if (cache >= f_type->nmasks) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Cache with id %u does not exists for level %d"), + cache, level); + return -1; + } + + f_mask = f_type->masks[cache]; + if (!f_mask) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Cache level %d id %u does not support tuning for " + "scope type '%s'"), + level, cache, virCacheTypeToString(type)); + return -1; + } + + if (*size == i_type->size) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Cache allocation for the whole cache is not " + "possible, specify size smaller than %llu"), + i_type->size); + return -1; + } + + need_bits = *size / i_type->control.granularity; + + if (*size % i_type->control.granularity) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Cache allocation of size %llu is not " + "divisible by granularity %llu"), + *size, i_type->control.granularity); + return -1; + } + + if (need_bits < i_type->min_cbm_bits) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Cache allocation of size %llu is smaller " + "than the minimum allowed allocation %llu"), + *size, + i_type->control.granularity * i_type->min_cbm_bits); + return -1; + } + + while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) { + ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos); + ssize_t bits; + + if (pos_clear < 0) + pos_clear = virBitmapSize(f_mask); + + bits = pos_clear - pos; + + /* Not enough bits, move on and skip all of them */ + if (bits < need_bits) { + pos = pos_clear; + continue; + } + + /* This fits perfectly */ + if (bits == need_bits) { + last_pos = pos; + break; + } + + /* Remember the smaller region if we already found on before */ + if (last_pos < 0 || (last_bits && bits < last_bits)) { + last_bits = bits; + last_pos = pos; + } + + pos = pos_clear; + } + + if (last_pos < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Not enough room for allocation of " + "%llu bytes for level %u cache %u " + "scope type '%s'"), + *size, level, cache, + virCacheTypeToString(type)); + return -1; + } + + a_mask = virBitmapNew(i_type->bits); + if (!a_mask) + return -1; + + for (i = last_pos; i < last_pos + need_bits; i++) { + ignore_value(virBitmapSetBit(a_mask, i)); + ignore_value(virBitmapClearBit(f_mask, i)); + } + + if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) { + virBitmapFree(a_mask); + return -1; + } + + return 0; +} + + +/* + * This function is called when creating an allocation in the system. What it + * does is that it gets all the unused bits using virResctrlAllocGetUnused() and + * then tries to find a proper space for every requested allocation effectively + * transforming `sizes` into `masks`. + */ +static int +virResctrlAllocMasksAssign(virResctrlInfoPtr resctrl, + virResctrlAllocPtr alloc) +{ + int ret = -1; + unsigned int level = 0; + virResctrlAllocPtr alloc_free = NULL; + + alloc_free = virResctrlAllocGetUnused(resctrl); + if (!alloc_free) + return -1; + + for (level = 0; level < alloc->nlevels; level++) { + virResctrlAllocPerLevelPtr a_level = alloc->levels[level]; + virResctrlAllocPerLevelPtr f_level = NULL; + unsigned int type = 0; + + if (!a_level) + continue; + + if (level < alloc_free->nlevels) + f_level = alloc_free->levels[level]; + + if (!f_level) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Cache level %d does not support tuning"), + level); + goto cleanup; + } + + for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) { + virResctrlAllocPerTypePtr a_type = a_level->types[type]; + virResctrlAllocPerTypePtr f_type = f_level->types[type]; + unsigned int cache = 0; + + if (!a_type) + continue; + + if (!f_type) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Cache level %d does not support tuning for " + "scope type '%s'"), + level, virCacheTypeToString(type)); + goto cleanup; + } + + for (cache = 0; cache < a_type->nsizes; cache++) { + virResctrlInfoPerLevelPtr i_level = resctrl->levels[level]; + virResctrlInfoPerTypePtr i_type = i_level->types[type]; + + if (virResctrlAllocFindUnused(a_type, i_type, f_type, level, type, cache) < 0) + goto cleanup; + } + } + } + + ret = 0; + cleanup: + virObjectUnref(alloc_free); + return ret; +} + + +/* This checks if the directory for the alloc exists. If not it tries to create + * it and apply appropriate alloc settings. */ +int +virResctrlAllocCreate(virResctrlInfoPtr resctrl, + virResctrlAllocPtr alloc, + const char *machinename) +{ + char *schemata_path = NULL; + char *alloc_str = NULL; + int ret = -1; + int lockfd = -1; + + if (!alloc) + return 0; + + if (!resctrl) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Resource control is not supported on this host")); + return -1; + } + + if (!alloc->id) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Resctrl Allocation ID must be set before creation")); + return -1; + } + + if (!alloc->path && + virAsprintf(&alloc->path, "%s/%s-%s", + SYSFS_RESCTRL_PATH, machinename, alloc->id) < 0) + return -1; + + if (virFileExists(alloc->path)) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Path '%s' for resctrl allocation exists"), + alloc->path); + goto cleanup; + } + + lockfd = virResctrlLockWrite(); + if (lockfd < 0) + goto cleanup; + + if (virResctrlAllocMasksAssign(resctrl, alloc) < 0) + goto cleanup; + + alloc_str = virResctrlAllocFormat(alloc); + if (!alloc_str) + goto cleanup; + + if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0) + goto cleanup; + + if (virFileMakePath(alloc->path) < 0) { + virReportSystemError(errno, + _("Cannot create resctrl directory '%s'"), + alloc->path); + goto cleanup; + } + + VIR_DEBUG("Writing resctrl schemata '%s' into '%s'", alloc_str, schemata_path); + if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) { + rmdir(alloc->path); + virReportSystemError(errno, + _("Cannot write into schemata file '%s'"), + schemata_path); + goto cleanup; + } + + ret = 0; + cleanup: + virResctrlUnlock(lockfd); + VIR_FREE(alloc_str); + VIR_FREE(schemata_path); + return ret; +} + + +int +virResctrlAllocAddPID(virResctrlAllocPtr alloc, + pid_t pid) +{ + char *tasks = NULL; + char *pidstr = NULL; + int ret = 0; + + if (!alloc->path) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Cannot add pid to non-existing resctrl allocation")); + return -1; + } + + if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0) + return -1; + + if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0) + goto cleanup; + + if (virFileWriteStr(tasks, pidstr, 0) < 0) { + virReportSystemError(errno, + _("Cannot write pid in tasks file '%s'"), + tasks); + goto cleanup; + } + + ret = 0; + cleanup: + VIR_FREE(tasks); + VIR_FREE(pidstr); + return ret; +} + + +int +virResctrlAllocRemove(virResctrlAllocPtr alloc) +{ + int ret = 0; + + if (!alloc->path) + return 0; + + VIR_DEBUG("Removing resctrl allocation %s", alloc->path); + if (rmdir(alloc->path) != 0 && errno != ENOENT) { + ret = -errno; + VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno); + } + + return ret; +} diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h index a81ff687de..63a954feaa 100644 --- a/src/util/virresctrl.h +++ b/src/util/virresctrl.h @@ -65,4 +65,53 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl, size_t *ncontrols, virResctrlInfoPerCachePtr **controls); +/* Alloc-related things */ +typedef struct _virResctrlAlloc virResctrlAlloc; +typedef virResctrlAlloc *virResctrlAllocPtr; + +typedef int virResctrlAllocForeachSizeCallback(unsigned int level, + virCacheType type, + unsigned int cache, + unsigned long long size, + void *opaque); + +virResctrlAllocPtr +virResctrlAllocNew(void); + +bool +virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl); + +int +virResctrlAllocSetSize(virResctrlAllocPtr resctrl, + unsigned int level, + virCacheType type, + unsigned int cache, + unsigned long long size); + +int +virResctrlAllocForeachSize(virResctrlAllocPtr resctrl, + virResctrlAllocForeachSizeCallback cb, + void *opaque); + +int +virResctrlAllocSetID(virResctrlAllocPtr alloc, + const char *id); +const char * +virResctrlAllocGetID(virResctrlAllocPtr alloc); + +char * +virResctrlAllocFormat(virResctrlAllocPtr alloc); + +int +virResctrlAllocCreate(virResctrlInfoPtr r_info, + virResctrlAllocPtr alloc, + const char *machinename); + +int +virResctrlAllocAddPID(virResctrlAllocPtr alloc, + pid_t pid); + +int +virResctrlAllocRemove(virResctrlAllocPtr alloc); + #endif /* __VIR_RESCTRL_H__ */ diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h new file mode 100644 index 0000000000..72b115afd1 --- /dev/null +++ b/src/util/virresctrlpriv.h @@ -0,0 +1,27 @@ +/* + * virresctrlpriv.h: + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * . + */ + +#ifndef __VIR_RESCTRL_PRIV_H__ +# define __VIR_RESCTRL_PRIV_H__ + +# include "virresctrl.h" + +virResctrlAllocPtr +virResctrlAllocGetUnused(virResctrlInfoPtr resctrl); + +#endif /* __VIR_RESCTRL_PRIV_H__ */ -- 2.16.1