Blob Blame History Raw
From 2a7678f0966137f6e9b226934ab8dd67d6f2e963 Mon Sep 17 00:00:00 2001
Message-Id: <2a7678f0966137f6e9b226934ab8dd67d6f2e963@dist-git>
From: Michal Privoznik <mprivozn@redhat.com>
Date: Mon, 9 Apr 2018 15:46:50 +0200
Subject: [PATCH] qemu_cgroup: Handle device mapper targets properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RHEL-7.6: https://bugzilla.redhat.com/show_bug.cgi?id=1557769
RHEL-7.5.z: https://bugzilla.redhat.com/show_bug.cgi?id=1564996

Problem with device mapper targets is that there can be several
other devices 'hidden' behind them. For instance, /dev/dm-1 can
consist of /dev/sda, /dev/sdb and /dev/sdc. Therefore, when
setting up devices CGroup and namespaces we have to take this
into account.

This bug was exposed after Linux kernel was fixed. Initially,
kernel used different functions for getting block device in
open() and ioctl(). While CGroup permissions were checked in the
former case, due to a bug in kernel they were not checked in the
latter case. This changed with the upstream commit of
519049afead4f7c3e6446028c41e99fde958cc04 (v4.16-rc5~11^2~4).

Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
(cherry picked from commit 6dd84f6850ca4379203d1e7b999430ed59041208)
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
Reviewed-by: Ján Tomko <jtomko@redhat.com>
---
 src/qemu/qemu_cgroup.c | 46 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index b604edb31c..d88eb7881f 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -37,6 +37,7 @@
 #include "virtypedparam.h"
 #include "virnuma.h"
 #include "virsystemd.h"
+#include "virdevmapper.h"
 
 #define VIR_FROM_THIS VIR_FROM_QEMU
 
@@ -60,7 +61,10 @@ qemuSetupImagePathCgroup(virDomainObjPtr vm,
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
     int perms = VIR_CGROUP_DEVICE_READ;
-    int ret;
+    char **targetPaths = NULL;
+    size_t i;
+    int rv;
+    int ret = -1;
 
     if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
         return 0;
@@ -71,12 +75,41 @@ qemuSetupImagePathCgroup(virDomainObjPtr vm,
     VIR_DEBUG("Allow path %s, perms: %s",
               path, virCgroupGetDevicePermsString(perms));
 
-    ret = virCgroupAllowDevicePath(priv->cgroup, path, perms, true);
+    rv = virCgroupAllowDevicePath(priv->cgroup, path, perms, true);
 
     virDomainAuditCgroupPath(vm, priv->cgroup, "allow", path,
                              virCgroupGetDevicePermsString(perms),
-                             ret);
+                             rv);
+    if (rv < 0)
+        goto cleanup;
 
+    if (rv > 0) {
+        /* @path is neither character device nor block device. */
+        ret = 0;
+        goto cleanup;
+    }
+
+    if (virDevMapperGetTargets(path, &targetPaths) < 0 &&
+        errno != ENOSYS && errno != EBADF) {
+        virReportSystemError(errno,
+                             _("Unable to get devmapper targets for %s"),
+                             path);
+        goto cleanup;
+    }
+
+    for (i = 0; targetPaths && targetPaths[i]; i++) {
+        rv = virCgroupAllowDevicePath(priv->cgroup, targetPaths[i], perms, false);
+
+        virDomainAuditCgroupPath(vm, priv->cgroup, "allow", targetPaths[i],
+                                 virCgroupGetDevicePermsString(perms),
+                                 rv);
+        if (rv < 0)
+            goto cleanup;
+    }
+
+    ret = 0;
+ cleanup:
+    virStringListFree(targetPaths);
     return ret;
 }
 
@@ -131,6 +164,13 @@ qemuTeardownImageCgroup(virDomainObjPtr vm,
     virDomainAuditCgroupPath(vm, priv->cgroup, "deny", src->path,
                              virCgroupGetDevicePermsString(perms), ret);
 
+    /* If you're looking for a counter part to
+     * qemuSetupImagePathCgroup you're at the right place.
+     * However, we can't just blindly deny all the device mapper
+     * targets of src->path because they might still be used by
+     * another disk in domain. Just like we are not removing
+     * disks from namespace. */
+
     return ret;
 }
 
-- 
2.17.0