render / rpms / libvirt

Forked from rpms/libvirt 11 months ago
Clone
Blob Blame History Raw
From 5fb3ec71e0e67f1f1c0157c9c9038c5136820799 Mon Sep 17 00:00:00 2001
Message-Id: <5fb3ec71e0e67f1f1c0157c9c9038c5136820799@dist-git>
From: Martin Kletzander <mkletzan@redhat.com>
Date: Wed, 31 Jan 2018 16:32:29 +0100
Subject: [PATCH] resctrl: Add functions to work with resctrl allocations

With this commit we finally have a way to read and manipulate basic resctrl
settings.  Locking is done only on exposed functions that read/write from/to
resctrlfs.  Not in functions that are exposed in virresctrlpriv.h as those are
only supposed to be used from tests.

More information about how resctrl works:

  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/x86/intel_rdt_ui.txt

Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
(cherry picked from commit a64c761c275c61517360cf1f5af5461fc5b93138)
Signed-off-by: Martin Kletzander <mkletzan@redhat.com>

https://bugzilla.redhat.com/show_bug.cgi?id=1289368

Conflicts:

 - whitespace due to 3e7db8d3e8538bcd5deb1b111fb1233fc18831aa

Signed-off-by: Martin Kletzander <mkletzan@redhat.com>
---
 src/Makefile.am           |    2 +-
 src/libvirt_private.syms  |   10 +
 src/util/virresctrl.c     | 1174 ++++++++++++++++++++++++++++++++++++++++++++-
 src/util/virresctrl.h     |   49 ++
 src/util/virresctrlpriv.h |   27 ++
 5 files changed, 1260 insertions(+), 2 deletions(-)
 create mode 100644 src/util/virresctrlpriv.h

diff --git a/src/Makefile.am b/src/Makefile.am
index b74856ba78..6b24a47afe 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -167,7 +167,7 @@ UTIL_SOURCES =							\
 		util/virprocess.c util/virprocess.h		\
 		util/virqemu.c util/virqemu.h			\
 		util/virrandom.h util/virrandom.c		\
-		util/virresctrl.h util/virresctrl.c		\
+		util/virresctrl.h util/virresctrl.c util/virresctrlpriv.h \
 		util/virrotatingfile.h util/virrotatingfile.c   \
 		util/virscsi.c util/virscsi.h			\
 		util/virscsihost.c util/virscsihost.h		\
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 02bad6c0ce..ee75e09767 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2547,6 +2547,16 @@ virRandomInt;
 # util/virresctrl.h
 virCacheTypeFromString;
 virCacheTypeToString;
+virResctrlAllocAddPID;
+virResctrlAllocCreate;
+virResctrlAllocForeachSize;
+virResctrlAllocFormat;
+virResctrlAllocGetID;
+virResctrlAllocGetUnused;
+virResctrlAllocNew;
+virResctrlAllocRemove;
+virResctrlAllocSetID;
+virResctrlAllocSetSize;
 virResctrlGetInfo;
 virResctrlInfoGetCache;
 virResctrlInfoNew;
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index d439e7792c..e252aefe31 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -18,8 +18,12 @@
 
 #include <config.h>
 
-#include "virresctrl.h"
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
+#include "virresctrlpriv.h"
 #include "c-ctype.h"
 #include "count-one-bits.h"
 #include "viralloc.h"
@@ -152,6 +156,193 @@ virResctrlInfoNew(void)
 }
 
 
+/* Alloc-related definitions and AllocClass-related functions */
+
+/*
+ * virResctrlAlloc represents one allocation (in XML under cputune/cachetune and
+ * consequently a directory under /sys/fs/resctrl).  Since it can have multiple
+ * parts of multiple caches allocated it is represented as bunch of nested
+ * sparse arrays (by sparse I mean array of pointers so that each might be NULL
+ * in case there is no allocation for that particular one (level, cache, ...)).
+ *
+ * Since one allocation can be made for caches on different levels, the first
+ * nested sparse array is of types virResctrlAllocPerLevel.  For example if you
+ * have allocation for level 3 cache, there will be three NULL pointers and then
+ * allocated pointer to virResctrlAllocPerLevel.  That way you can access it by
+ * `alloc[level]` as O(1) is desired instead of crawling through normal arrays
+ * or lists in three nested loops.  The code uses a lot of direct accesses.
+ *
+ * Each virResctrlAllocPerLevel can have allocations for different cache
+ * allocation types.  You can allocate instruction cache (VIR_CACHE_TYPE_CODE),
+ * data cache (VIR_CACHE_TYPE_DATA) or unified cache (VIR_CACHE_TYPE_BOTH).
+ * Those allocations are kept in sparse array of virResctrlAllocPerType pointers.
+ *
+ * For each virResctrlAllocPerType users can request some size of the cache to
+ * be allocated.  That's what the sparse array `sizes` is for.  Non-NULL
+ * pointers represent requested size allocations.  The array is indexed by host
+ * cache id (gotten from `/sys/devices/system/cpu/cpuX/cache/indexY/id`).  Users
+ * can see this information e.g. in the output of `virsh capabilities` (for that
+ * information there's the other struct, namely `virResctrlInfo`).
+ *
+ * When allocation is being created we need to find unused part of the cache for
+ * all of them.  While doing that we store the bitmask in a sparse array of
+ * virBitmaps named `masks` indexed the same way as `sizes`.  The upper bounds
+ * of the sparse arrays are stored in nmasks or nsizes, respectively.
+ */
+typedef struct _virResctrlAllocPerType virResctrlAllocPerType;
+typedef virResctrlAllocPerType *virResctrlAllocPerTypePtr;
+struct _virResctrlAllocPerType {
+    /* There could be bool saying whether this is set or not, but since everything
+     * in virResctrlAlloc (and most of libvirt) goes with pointer arrays we would
+     * have to have one more level of allocation anyway, so this stays faithful to
+     * the concept */
+    unsigned long long **sizes;
+    size_t nsizes;
+
+    /* Mask for each cache */
+    virBitmapPtr *masks;
+    size_t nmasks;
+};
+
+typedef struct _virResctrlAllocPerLevel virResctrlAllocPerLevel;
+typedef virResctrlAllocPerLevel *virResctrlAllocPerLevelPtr;
+struct _virResctrlAllocPerLevel {
+    virResctrlAllocPerTypePtr *types; /* Indexed with enum virCacheType */
+    /* There is no `ntypes` member variable as it is always allocated for
+     * VIR_CACHE_TYPE_LAST number of items */
+};
+
+struct _virResctrlAlloc {
+    virObject parent;
+
+    virResctrlAllocPerLevelPtr *levels;
+    size_t nlevels;
+
+    /* The identifier (any unique string for now) */
+    char *id;
+    /* libvirt-generated path in /sys/fs/resctrl for this particular
+     * allocation */
+    char *path;
+};
+
+static virClassPtr virResctrlAllocClass;
+
+static void
+virResctrlAllocDispose(void *obj)
+{
+    size_t i = 0;
+    size_t j = 0;
+    size_t k = 0;
+
+    virResctrlAllocPtr resctrl = obj;
+
+    for (i = 0; i < resctrl->nlevels; i++) {
+        virResctrlAllocPerLevelPtr level = resctrl->levels[i];
+
+        if (!level)
+            continue;
+
+        for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+            virResctrlAllocPerTypePtr type = level->types[j];
+
+            if (!type)
+                continue;
+
+            for (k = 0; k < type->nsizes; k++)
+                VIR_FREE(type->sizes[k]);
+
+            for (k = 0; k < type->nmasks; k++)
+                virBitmapFree(type->masks[k]);
+
+            VIR_FREE(type->sizes);
+            VIR_FREE(type->masks);
+            VIR_FREE(type);
+        }
+        VIR_FREE(level->types);
+        VIR_FREE(level);
+    }
+
+    VIR_FREE(resctrl->id);
+    VIR_FREE(resctrl->path);
+    VIR_FREE(resctrl->levels);
+}
+
+
+static int
+virResctrlAllocOnceInit(void)
+{
+    if (!(virResctrlAllocClass = virClassNew(virClassForObject(),
+                                             "virResctrlAlloc",
+                                             sizeof(virResctrlAlloc),
+                                             virResctrlAllocDispose)))
+        return -1;
+
+    return 0;
+}
+
+
+VIR_ONCE_GLOBAL_INIT(virResctrlAlloc)
+
+
+virResctrlAllocPtr
+virResctrlAllocNew(void)
+{
+    if (virResctrlAllocInitialize() < 0)
+        return NULL;
+
+    return virObjectNew(virResctrlAllocClass);
+}
+
+
+/* Common functions */
+static int
+virResctrlLockInternal(int op)
+{
+    int fd = open(SYSFS_RESCTRL_PATH, O_DIRECTORY | O_CLOEXEC);
+
+    if (fd < 0) {
+        virReportSystemError(errno, "%s", _("Cannot open resctrlfs"));
+        return -1;
+    }
+
+    if (flock(fd, op) < 0) {
+        virReportSystemError(errno, "%s", _("Cannot lock resctrlfs"));
+        VIR_FORCE_CLOSE(fd);
+        return -1;
+    }
+
+    return fd;
+}
+
+
+static inline int
+virResctrlLockWrite(void)
+{
+    return virResctrlLockInternal(LOCK_EX);
+}
+
+
+static int
+virResctrlUnlock(int fd)
+{
+    if (fd == -1)
+        return 0;
+
+    /* The lock gets unlocked by closing the fd, which we need to do anyway in
+     * order to clean up properly */
+    if (VIR_CLOSE(fd) < 0) {
+        virReportSystemError(errno, "%s", _("Cannot close resctrlfs"));
+
+        /* Trying to save the already broken */
+        if (flock(fd, LOCK_UN) < 0)
+            virReportSystemError(errno, "%s", _("Cannot unlock resctrlfs"));
+        return -1;
+    }
+
+    return 0;
+}
+
+
 /* Info-related functions */
 static bool
 virResctrlInfoIsEmpty(virResctrlInfoPtr resctrl)
@@ -367,3 +558,984 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
     VIR_FREE(*controls);
     goto cleanup;
 }
+
+
+/* Alloc-related functions */
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl)
+{
+    size_t i = 0;
+    size_t j = 0;
+    size_t k = 0;
+
+    if (!resctrl)
+        return true;
+
+    for (i = 0; i < resctrl->nlevels; i++) {
+        virResctrlAllocPerLevelPtr a_level = resctrl->levels[i];
+
+        if (!a_level)
+            continue;
+
+        for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+            virResctrlAllocPerTypePtr a_type = a_level->types[j];
+
+            if (!a_type)
+                continue;
+
+            for (k = 0; k < a_type->nsizes; k++) {
+                if (a_type->sizes[k])
+                    return false;
+            }
+
+            for (k = 0; k < a_type->nmasks; k++) {
+                if (a_type->masks[k])
+                    return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+
+static virResctrlAllocPerTypePtr
+virResctrlAllocGetType(virResctrlAllocPtr resctrl,
+                       unsigned int level,
+                       virCacheType type)
+{
+    virResctrlAllocPerLevelPtr a_level = NULL;
+
+    if (resctrl->nlevels <= level &&
+        VIR_EXPAND_N(resctrl->levels, resctrl->nlevels, level - resctrl->nlevels + 1) < 0)
+        return NULL;
+
+    if (!resctrl->levels[level]) {
+        virResctrlAllocPerTypePtr *types = NULL;
+
+        if (VIR_ALLOC_N(types, VIR_CACHE_TYPE_LAST) < 0)
+            return NULL;
+
+        if (VIR_ALLOC(resctrl->levels[level]) < 0) {
+            VIR_FREE(types);
+            return NULL;
+        }
+        resctrl->levels[level]->types = types;
+    }
+
+    a_level = resctrl->levels[level];
+
+    if (!a_level->types[type] && VIR_ALLOC(a_level->types[type]) < 0)
+        return NULL;
+
+    return a_level->types[type];
+}
+
+
+static int
+virResctrlAllocUpdateMask(virResctrlAllocPtr resctrl,
+                          unsigned int level,
+                          virCacheType type,
+                          unsigned int cache,
+                          virBitmapPtr mask)
+{
+    virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+    if (!a_type)
+        return -1;
+
+    if (a_type->nmasks <= cache &&
+        VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+                     cache - a_type->nmasks + 1) < 0)
+        return -1;
+
+    if (!a_type->masks[cache]) {
+        a_type->masks[cache] = virBitmapNew(virBitmapSize(mask));
+
+        if (!a_type->masks[cache])
+            return -1;
+    }
+
+    return virBitmapCopy(a_type->masks[cache], mask);
+}
+
+
+static int
+virResctrlAllocUpdateSize(virResctrlAllocPtr resctrl,
+                          unsigned int level,
+                          virCacheType type,
+                          unsigned int cache,
+                          unsigned long long size)
+{
+    virResctrlAllocPerTypePtr a_type = virResctrlAllocGetType(resctrl, level, type);
+
+    if (!a_type)
+        return -1;
+
+    if (a_type->nsizes <= cache &&
+        VIR_EXPAND_N(a_type->sizes, a_type->nsizes,
+                     cache - a_type->nsizes + 1) < 0)
+        return -1;
+
+    if (!a_type->sizes[cache] && VIR_ALLOC(a_type->sizes[cache]) < 0)
+        return -1;
+
+    *(a_type->sizes[cache]) = size;
+
+    return 0;
+}
+
+
+/*
+ * Check if there is an allocation for this level/type/cache already.  Called
+ * before updating the structure.  VIR_CACHE_TYPE_BOTH collides with any type,
+ * the other types collide with itself.  This code basically checks if either:
+ * `alloc[level]->types[type]->sizes[cache]`
+ * or
+ * `alloc[level]->types[VIR_CACHE_TYPE_BOTH]->sizes[cache]`
+ * is non-NULL.  All the fuzz around it is checking for NULL pointers along
+ * the way.
+ */
+static bool
+virResctrlAllocCheckCollision(virResctrlAllocPtr alloc,
+                              unsigned int level,
+                              virCacheType type,
+                              unsigned int cache)
+{
+    virResctrlAllocPerLevelPtr a_level = NULL;
+    virResctrlAllocPerTypePtr a_type = NULL;
+
+    if (!alloc)
+        return false;
+
+    if (alloc->nlevels <= level)
+        return false;
+
+    a_level = alloc->levels[level];
+
+    if (!a_level)
+        return false;
+
+    a_type = a_level->types[VIR_CACHE_TYPE_BOTH];
+
+    /* If there is an allocation for type 'both', there can be no other
+     * allocation for the same cache */
+    if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+        return true;
+
+    if (type == VIR_CACHE_TYPE_BOTH) {
+        a_type = a_level->types[VIR_CACHE_TYPE_CODE];
+
+        if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+            return true;
+
+        a_type = a_level->types[VIR_CACHE_TYPE_DATA];
+
+        if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+            return true;
+    } else {
+        a_type = a_level->types[type];
+
+        if (a_type && a_type->nsizes > cache && a_type->sizes[cache])
+            return true;
+    }
+
+    return false;
+}
+
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+                       unsigned int level,
+                       virCacheType type,
+                       unsigned int cache,
+                       unsigned long long size)
+{
+    if (virResctrlAllocCheckCollision(resctrl, level, type, cache)) {
+        virReportError(VIR_ERR_XML_ERROR,
+                       _("Colliding cache allocations for cache "
+                         "level '%u' id '%u', type '%s'"),
+                       level, cache, virCacheTypeToString(type));
+        return -1;
+    }
+
+    return virResctrlAllocUpdateSize(resctrl, level, type, cache, size);
+}
+
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+                           virResctrlAllocForeachSizeCallback cb,
+                           void *opaque)
+{
+    int ret = 0;
+    unsigned int level = 0;
+    unsigned int type = 0;
+    unsigned int cache = 0;
+
+    if (!resctrl)
+        return 0;
+
+    for (level = 0; level < resctrl->nlevels; level++) {
+        virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+        if (!a_level)
+            continue;
+
+        for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+            virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+            if (!a_type)
+                continue;
+
+            for (cache = 0; cache < a_type->nsizes; cache++) {
+                unsigned long long *size = a_type->sizes[cache];
+
+                if (!size)
+                    continue;
+
+                ret = cb(level, type, cache, *size, opaque);
+                if (ret < 0)
+                    return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+                     const char *id)
+{
+    if (!id) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Resctrl allocation 'id' cannot be NULL"));
+        return -1;
+    }
+
+    return VIR_STRDUP(alloc->id, id);
+}
+
+
+const char *
+virResctrlAllocGetID(virResctrlAllocPtr alloc)
+{
+    return alloc->id;
+}
+
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr resctrl)
+{
+    virBuffer buf = VIR_BUFFER_INITIALIZER;
+    unsigned int level = 0;
+    unsigned int type = 0;
+    unsigned int cache = 0;
+
+    if (!resctrl)
+        return NULL;
+
+    for (level = 0; level < resctrl->nlevels; level++) {
+        virResctrlAllocPerLevelPtr a_level = resctrl->levels[level];
+
+        if (!a_level)
+            continue;
+
+        for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+            virResctrlAllocPerTypePtr a_type = a_level->types[type];
+
+            if (!a_type)
+                continue;
+
+            virBufferAsprintf(&buf, "L%u%s:", level, virResctrlTypeToString(type));
+
+            for (cache = 0; cache < a_type->nmasks; cache++) {
+                virBitmapPtr mask = a_type->masks[cache];
+                char *mask_str = NULL;
+
+                if (!mask)
+                    continue;
+
+                mask_str = virBitmapToString(mask, false, true);
+                if (!mask_str) {
+                    virBufferFreeAndReset(&buf);
+                    return NULL;
+                }
+
+                virBufferAsprintf(&buf, "%u=%s;", cache, mask_str);
+                VIR_FREE(mask_str);
+            }
+
+            virBufferTrim(&buf, ";", 1);
+            virBufferAddChar(&buf, '\n');
+        }
+    }
+
+    virBufferCheckError(&buf);
+    return virBufferContentAndReset(&buf);
+}
+
+
+static int
+virResctrlAllocParseProcessCache(virResctrlInfoPtr resctrl,
+                                 virResctrlAllocPtr alloc,
+                                 unsigned int level,
+                                 virCacheType type,
+                                 char *cache)
+{
+    char *tmp = strchr(cache, '=');
+    unsigned int cache_id = 0;
+    virBitmapPtr mask = NULL;
+    int ret = -1;
+
+    if (!tmp)
+        return 0;
+
+    *tmp = '\0';
+    tmp++;
+
+    if (virStrToLong_uip(cache, NULL, 10, &cache_id) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("Invalid cache id '%s'"), cache);
+        return -1;
+    }
+
+    mask = virBitmapNewString(tmp);
+    if (!mask)
+        return -1;
+
+    virBitmapShrink(mask, resctrl->levels[level]->types[type]->bits);
+
+    if (virResctrlAllocUpdateMask(alloc, level, type, cache_id, mask) < 0)
+        goto cleanup;
+
+    ret = 0;
+ cleanup:
+    virBitmapFree(mask);
+    return ret;
+}
+
+
+static int
+virResctrlAllocParseProcessLine(virResctrlInfoPtr resctrl,
+                                virResctrlAllocPtr alloc,
+                                char *line)
+{
+    char **caches = NULL;
+    char *tmp = NULL;
+    unsigned int level = 0;
+    int type = -1;
+    size_t ncaches = 0;
+    size_t i = 0;
+    int ret = -1;
+
+    /* For no reason there can be spaces */
+    virSkipSpaces((const char **) &line);
+
+    /* Skip lines that don't concern caches, e.g. MB: etc. */
+    if (line[0] != 'L')
+        return 0;
+
+    /* And lines that we can't parse too */
+    tmp = strchr(line, ':');
+    if (!tmp)
+        return 0;
+
+    *tmp = '\0';
+    tmp++;
+
+    if (virStrToLong_uip(line + 1, &line, 10, &level) < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("Cannot parse resctrl schema level '%s'"),
+                       line + 1);
+        return -1;
+    }
+
+    type = virResctrlTypeFromString(line);
+    if (type < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("Cannot parse resctrl schema level '%s'"),
+                       line + 1);
+        return -1;
+    }
+
+    caches = virStringSplitCount(tmp, ";", 0, &ncaches);
+    if (!caches)
+        return 0;
+
+    for (i = 0; i < ncaches; i++) {
+        if (virResctrlAllocParseProcessCache(resctrl, alloc, level, type, caches[i]) < 0)
+            goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    virStringListFree(caches);
+    return ret;
+}
+
+
+static int
+virResctrlAllocParse(virResctrlInfoPtr resctrl,
+                     virResctrlAllocPtr alloc,
+                     const char *schemata)
+{
+    char **lines = NULL;
+    size_t nlines = 0;
+    size_t i = 0;
+    int ret = -1;
+
+    lines = virStringSplitCount(schemata, "\n", 0, &nlines);
+    for (i = 0; i < nlines; i++) {
+        if (virResctrlAllocParseProcessLine(resctrl, alloc, lines[i]) < 0)
+            goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    virStringListFree(lines);
+    return ret;
+}
+
+
+static void
+virResctrlAllocSubtractPerType(virResctrlAllocPerTypePtr dst,
+                               virResctrlAllocPerTypePtr src)
+{
+    size_t i = 0;
+
+    if (!dst || !src)
+        return;
+
+    for (i = 0; i < dst->nmasks && i < src->nmasks; i++) {
+        if (dst->masks[i] && src->masks[i])
+            virBitmapSubtract(dst->masks[i], src->masks[i]);
+    }
+}
+
+
+static void
+virResctrlAllocSubtract(virResctrlAllocPtr dst,
+                        virResctrlAllocPtr src)
+{
+    size_t i = 0;
+    size_t j = 0;
+
+    if (!src)
+        return;
+
+    for (i = 0; i < dst->nlevels && i < src->nlevels; i++) {
+        if (dst->levels[i] && src->levels[i]) {
+            for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+                virResctrlAllocSubtractPerType(dst->levels[i]->types[j],
+                                               src->levels[i]->types[j]);
+            }
+        }
+    }
+}
+
+
+static virResctrlAllocPtr
+virResctrlAllocNewFromInfo(virResctrlInfoPtr info)
+{
+    size_t i = 0;
+    size_t j = 0;
+    size_t k = 0;
+    virResctrlAllocPtr ret = virResctrlAllocNew();
+    virBitmapPtr mask = NULL;
+
+    if (!ret)
+        return NULL;
+
+    for (i = 0; i < info->nlevels; i++) {
+        virResctrlInfoPerLevelPtr i_level = info->levels[i];
+
+        if (!i_level)
+            continue;
+
+        for (j = 0; j < VIR_CACHE_TYPE_LAST; j++) {
+            virResctrlInfoPerTypePtr i_type = i_level->types[j];
+
+            if (!i_type)
+                continue;
+
+            virBitmapFree(mask);
+            mask = virBitmapNew(i_type->bits);
+            if (!mask)
+                goto error;
+            virBitmapSetAll(mask);
+
+            for (k = 0; k <= i_type->max_cache_id; k++) {
+                if (virResctrlAllocUpdateMask(ret, i, j, k, mask) < 0)
+                    goto error;
+            }
+        }
+    }
+
+ cleanup:
+    virBitmapFree(mask);
+    return ret;
+ error:
+    virObjectUnref(ret);
+    ret = NULL;
+    goto cleanup;
+}
+
+/*
+ * This function creates an allocation that represents all unused parts of all
+ * caches in the system.  It uses virResctrlInfo for creating a new full
+ * allocation with all bits set (using virResctrlAllocNewFromInfo()) and then
+ * scans for all allocations under /sys/fs/resctrl and subtracts each one of
+ * them from it.  That way it can then return an allocation with only bit set
+ * being those that are not mentioned in any other allocation.  It is used for
+ * two things, a) calculating the masks when creating allocations and b) from
+ * tests.
+ */
+virResctrlAllocPtr
+virResctrlAllocGetUnused(virResctrlInfoPtr resctrl)
+{
+    virResctrlAllocPtr ret = NULL;
+    virResctrlAllocPtr alloc = NULL;
+    struct dirent *ent = NULL;
+    DIR *dirp = NULL;
+    char *schemata = NULL;
+    int rv = -1;
+
+    if (virResctrlInfoIsEmpty(resctrl)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("Resource control is not supported on this host"));
+        return NULL;
+    }
+
+    ret = virResctrlAllocNewFromInfo(resctrl);
+    if (!ret)
+        return NULL;
+
+    if (virFileReadValueString(&schemata,
+                               SYSFS_RESCTRL_PATH
+                               "/schemata") < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Could not read schemata file for the default group"));
+        goto error;
+    }
+
+    alloc = virResctrlAllocNew();
+    if (!alloc)
+        goto error;
+
+    if (virResctrlAllocParse(resctrl, alloc, schemata) < 0)
+        goto error;
+
+    virResctrlAllocSubtract(ret, alloc);
+
+    if (virDirOpen(&dirp, SYSFS_RESCTRL_PATH) < 0)
+        goto error;
+
+    while ((rv = virDirRead(dirp, &ent, SYSFS_RESCTRL_PATH)) > 0) {
+        if (ent->d_type != DT_DIR)
+            continue;
+
+        if (STREQ(ent->d_name, "info"))
+            continue;
+
+        VIR_FREE(schemata);
+        rv = virFileReadValueString(&schemata,
+                                    SYSFS_RESCTRL_PATH
+                                    "/%s/schemata",
+                                    ent->d_name);
+        if (rv == -2)
+            continue;
+
+        if (rv < 0) {
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           _("Could not read schemata file for group %s"),
+                           ent->d_name);
+            goto error;
+        }
+
+        virObjectUnref(alloc);
+        alloc = virResctrlAllocNew();
+        if (!alloc)
+            goto error;
+
+        if (virResctrlAllocParse(resctrl, alloc, schemata) < 0)
+            goto error;
+
+        virResctrlAllocSubtract(ret, alloc);
+    }
+    if (rv < 0)
+        goto error;
+
+ cleanup:
+    virObjectUnref(alloc);
+    VIR_DIR_CLOSE(dirp);
+    VIR_FREE(schemata);
+    return ret;
+
+ error:
+    virObjectUnref(ret);
+    ret = NULL;
+    goto cleanup;
+}
+
+
+
+static int
+virResctrlAllocSetMask(virResctrlAllocPerTypePtr a_type,
+                       unsigned int cache,
+                       virBitmapPtr mask)
+{
+    if (a_type->nmasks <= cache &&
+        VIR_EXPAND_N(a_type->masks, a_type->nmasks,
+                     cache - a_type->nmasks + 1) < 0)
+        return -1;
+
+    a_type->masks[cache] = mask;
+
+    return 0;
+}
+
+
+/*
+ * Given the information about requested allocation type `a_type`, the host
+ * cache for a particular type `i_type` and unused bits in the system `f_type`
+ * this function tries to find the smallest free space in which the allocation
+ * for cache id `cache` would fit.  We're looking for the smallest place in
+ * order to minimize fragmentation and maximize the possibility of succeeding.
+ */
+static int
+virResctrlAllocFindUnused(virResctrlAllocPerTypePtr a_type,
+                          virResctrlInfoPerTypePtr i_type,
+                          virResctrlAllocPerTypePtr f_type,
+                          unsigned int level,
+                          unsigned int type,
+                          unsigned int cache)
+{
+    unsigned long long *size = a_type->sizes[cache];
+    virBitmapPtr a_mask = NULL;
+    virBitmapPtr f_mask = NULL;
+    unsigned long long need_bits;
+    size_t i = 0;
+    ssize_t pos = -1;
+    ssize_t last_bits = 0;
+    ssize_t last_pos = -1;
+
+    /* If there is no reservation requested we need to set all bits.  That's due
+     * to weird interface of the resctrl sysfs.  It's also the reason why we
+     * cannot reserve the whole cache in one allocation. */
+    if (!size) {
+        a_mask = virBitmapNew(i_type->bits);
+        if (!a_mask)
+            return -1;
+
+        virBitmapSetAll(a_mask);
+
+        if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) {
+            virBitmapFree(a_mask);
+            return -1;
+        }
+
+        return 0;
+    }
+
+    if (cache >= f_type->nmasks) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Cache with id %u does not exists for level %d"),
+                       cache, level);
+        return -1;
+    }
+
+    f_mask = f_type->masks[cache];
+    if (!f_mask) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Cache level %d id %u does not support tuning for "
+                         "scope type '%s'"),
+                       level, cache, virCacheTypeToString(type));
+        return -1;
+    }
+
+    if (*size == i_type->size) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Cache allocation for the whole cache is not "
+                         "possible, specify size smaller than %llu"),
+                       i_type->size);
+        return -1;
+    }
+
+    need_bits = *size / i_type->control.granularity;
+
+    if (*size % i_type->control.granularity) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Cache allocation of size %llu is not "
+                         "divisible by granularity %llu"),
+                       *size, i_type->control.granularity);
+        return -1;
+    }
+
+    if (need_bits < i_type->min_cbm_bits) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Cache allocation of size %llu is smaller "
+                         "than the minimum allowed allocation %llu"),
+                       *size,
+                       i_type->control.granularity * i_type->min_cbm_bits);
+        return -1;
+    }
+
+    while ((pos = virBitmapNextSetBit(f_mask, pos)) >= 0) {
+        ssize_t pos_clear = virBitmapNextClearBit(f_mask, pos);
+        ssize_t bits;
+
+        if (pos_clear < 0)
+            pos_clear = virBitmapSize(f_mask);
+
+        bits = pos_clear - pos;
+
+        /* Not enough bits, move on and skip all of them */
+        if (bits < need_bits) {
+            pos = pos_clear;
+            continue;
+        }
+
+        /* This fits perfectly */
+        if (bits == need_bits) {
+            last_pos = pos;
+            break;
+        }
+
+        /* Remember the smaller region if we already found on before */
+        if (last_pos < 0 || (last_bits && bits < last_bits)) {
+            last_bits = bits;
+            last_pos = pos;
+        }
+
+        pos = pos_clear;
+    }
+
+    if (last_pos < 0) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Not enough room for allocation of "
+                         "%llu bytes for level %u cache %u "
+                         "scope type '%s'"),
+                       *size, level, cache,
+                       virCacheTypeToString(type));
+        return -1;
+    }
+
+    a_mask = virBitmapNew(i_type->bits);
+    if (!a_mask)
+        return -1;
+
+    for (i = last_pos; i < last_pos + need_bits; i++) {
+        ignore_value(virBitmapSetBit(a_mask, i));
+        ignore_value(virBitmapClearBit(f_mask, i));
+    }
+
+    if (virResctrlAllocSetMask(a_type, cache, a_mask) < 0) {
+        virBitmapFree(a_mask);
+        return -1;
+    }
+
+    return 0;
+}
+
+
+/*
+ * This function is called when creating an allocation in the system.  What it
+ * does is that it gets all the unused bits using virResctrlAllocGetUnused() and
+ * then tries to find a proper space for every requested allocation effectively
+ * transforming `sizes` into `masks`.
+ */
+static int
+virResctrlAllocMasksAssign(virResctrlInfoPtr resctrl,
+                           virResctrlAllocPtr alloc)
+{
+    int ret = -1;
+    unsigned int level = 0;
+    virResctrlAllocPtr alloc_free = NULL;
+
+    alloc_free = virResctrlAllocGetUnused(resctrl);
+    if (!alloc_free)
+        return -1;
+
+    for (level = 0; level < alloc->nlevels; level++) {
+        virResctrlAllocPerLevelPtr a_level = alloc->levels[level];
+        virResctrlAllocPerLevelPtr f_level = NULL;
+        unsigned int type = 0;
+
+        if (!a_level)
+            continue;
+
+        if (level < alloc_free->nlevels)
+            f_level = alloc_free->levels[level];
+
+        if (!f_level) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("Cache level %d does not support tuning"),
+                           level);
+            goto cleanup;
+        }
+
+        for (type = 0; type < VIR_CACHE_TYPE_LAST; type++) {
+            virResctrlAllocPerTypePtr a_type = a_level->types[type];
+            virResctrlAllocPerTypePtr f_type = f_level->types[type];
+            unsigned int cache = 0;
+
+            if (!a_type)
+                continue;
+
+            if (!f_type) {
+                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                               _("Cache level %d does not support tuning for "
+                                 "scope type '%s'"),
+                               level, virCacheTypeToString(type));
+                goto cleanup;
+            }
+
+            for (cache = 0; cache < a_type->nsizes; cache++) {
+                virResctrlInfoPerLevelPtr i_level = resctrl->levels[level];
+                virResctrlInfoPerTypePtr i_type = i_level->types[type];
+
+                if (virResctrlAllocFindUnused(a_type, i_type, f_type, level, type, cache) < 0)
+                    goto cleanup;
+            }
+        }
+    }
+
+    ret = 0;
+ cleanup:
+    virObjectUnref(alloc_free);
+    return ret;
+}
+
+
+/* This checks if the directory for the alloc exists.  If not it tries to create
+ * it and apply appropriate alloc settings. */
+int
+virResctrlAllocCreate(virResctrlInfoPtr resctrl,
+                      virResctrlAllocPtr alloc,
+                      const char *machinename)
+{
+    char *schemata_path = NULL;
+    char *alloc_str = NULL;
+    int ret = -1;
+    int lockfd = -1;
+
+    if (!alloc)
+        return 0;
+
+    if (!resctrl) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("Resource control is not supported on this host"));
+        return -1;
+    }
+
+    if (!alloc->id) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Resctrl Allocation ID must be set before creation"));
+        return -1;
+    }
+
+    if (!alloc->path &&
+        virAsprintf(&alloc->path, "%s/%s-%s",
+                    SYSFS_RESCTRL_PATH, machinename, alloc->id) < 0)
+        return -1;
+
+    if (virFileExists(alloc->path)) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("Path '%s' for resctrl allocation exists"),
+                       alloc->path);
+        goto cleanup;
+    }
+
+    lockfd = virResctrlLockWrite();
+    if (lockfd < 0)
+        goto cleanup;
+
+    if (virResctrlAllocMasksAssign(resctrl, alloc) < 0)
+        goto cleanup;
+
+    alloc_str = virResctrlAllocFormat(alloc);
+    if (!alloc_str)
+        goto cleanup;
+
+    if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
+        goto cleanup;
+
+    if (virFileMakePath(alloc->path) < 0) {
+        virReportSystemError(errno,
+                             _("Cannot create resctrl directory '%s'"),
+                             alloc->path);
+        goto cleanup;
+    }
+
+    VIR_DEBUG("Writing resctrl schemata '%s' into '%s'", alloc_str, schemata_path);
+    if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
+        rmdir(alloc->path);
+        virReportSystemError(errno,
+                             _("Cannot write into schemata file '%s'"),
+                             schemata_path);
+        goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    virResctrlUnlock(lockfd);
+    VIR_FREE(alloc_str);
+    VIR_FREE(schemata_path);
+    return ret;
+}
+
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+                      pid_t pid)
+{
+    char *tasks = NULL;
+    char *pidstr = NULL;
+    int ret = 0;
+
+    if (!alloc->path) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Cannot add pid to non-existing resctrl allocation"));
+        return -1;
+    }
+
+    if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
+        return -1;
+
+    if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
+        goto cleanup;
+
+    if (virFileWriteStr(tasks, pidstr, 0) < 0) {
+        virReportSystemError(errno,
+                             _("Cannot write pid in tasks file '%s'"),
+                             tasks);
+        goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    VIR_FREE(tasks);
+    VIR_FREE(pidstr);
+    return ret;
+}
+
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc)
+{
+    int ret = 0;
+
+    if (!alloc->path)
+        return 0;
+
+    VIR_DEBUG("Removing resctrl allocation %s", alloc->path);
+    if (rmdir(alloc->path) != 0 && errno != ENOENT) {
+        ret = -errno;
+        VIR_ERROR(_("Unable to remove %s (%d)"), alloc->path, errno);
+    }
+
+    return ret;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index a81ff687de..63a954feaa 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -65,4 +65,53 @@ virResctrlInfoGetCache(virResctrlInfoPtr resctrl,
                        size_t *ncontrols,
                        virResctrlInfoPerCachePtr **controls);
 
+/* Alloc-related things */
+typedef struct _virResctrlAlloc virResctrlAlloc;
+typedef virResctrlAlloc *virResctrlAllocPtr;
+
+typedef int virResctrlAllocForeachSizeCallback(unsigned int level,
+                                               virCacheType type,
+                                               unsigned int cache,
+                                               unsigned long long size,
+                                               void *opaque);
+
+virResctrlAllocPtr
+virResctrlAllocNew(void);
+
+bool
+virResctrlAllocIsEmpty(virResctrlAllocPtr resctrl);
+
+int
+virResctrlAllocSetSize(virResctrlAllocPtr resctrl,
+                       unsigned int level,
+                       virCacheType type,
+                       unsigned int cache,
+                       unsigned long long size);
+
+int
+virResctrlAllocForeachSize(virResctrlAllocPtr resctrl,
+                           virResctrlAllocForeachSizeCallback cb,
+                           void *opaque);
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+                     const char *id);
+const char *
+virResctrlAllocGetID(virResctrlAllocPtr alloc);
+
+char *
+virResctrlAllocFormat(virResctrlAllocPtr alloc);
+
+int
+virResctrlAllocCreate(virResctrlInfoPtr r_info,
+                      virResctrlAllocPtr alloc,
+                      const char *machinename);
+
+int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+                      pid_t pid);
+
+int
+virResctrlAllocRemove(virResctrlAllocPtr alloc);
+
 #endif /*  __VIR_RESCTRL_H__ */
diff --git a/src/util/virresctrlpriv.h b/src/util/virresctrlpriv.h
new file mode 100644
index 0000000000..72b115afd1
--- /dev/null
+++ b/src/util/virresctrlpriv.h
@@ -0,0 +1,27 @@
+/*
+ * virresctrlpriv.h:
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VIR_RESCTRL_PRIV_H__
+# define __VIR_RESCTRL_PRIV_H__
+
+# include "virresctrl.h"
+
+virResctrlAllocPtr
+virResctrlAllocGetUnused(virResctrlInfoPtr resctrl);
+
+#endif /* __VIR_RESCTRL_PRIV_H__ */
-- 
2.16.1