Blame SOURCES/kvm-file-posix-Skip-effectiveless-OFD-lock-operations.patch

ae23c9
From b9aaae95df14d93aab128376c40943259a453730 Mon Sep 17 00:00:00 2001
ae23c9
From: Max Reitz <mreitz@redhat.com>
ae23c9
Date: Wed, 3 Apr 2019 17:13:10 +0100
ae23c9
Subject: [PATCH 03/11] file-posix: Skip effectiveless OFD lock operations
ae23c9
ae23c9
RH-Author: Max Reitz <mreitz@redhat.com>
ae23c9
Message-id: <20190403171315.20841-4-mreitz@redhat.com>
ae23c9
Patchwork-id: 85401
ae23c9
O-Subject: [RHEL-8.1 qemu-kvm PATCH 3/8] file-posix: Skip effectiveless OFD lock operations
ae23c9
Bugzilla: 1694148
ae23c9
RH-Acked-by: John Snow <jsnow@redhat.com>
ae23c9
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
ae23c9
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
ae23c9
ae23c9
From: Fam Zheng <famz@redhat.com>
ae23c9
ae23c9
If we know we've already locked the bytes, don't do it again; similarly
ae23c9
don't unlock a byte if we haven't locked it. This doesn't change the
ae23c9
behavior, but fixes a corner case explained below.
ae23c9
ae23c9
Libvirt had an error handling bug that an image can get its (ownership,
ae23c9
file mode, SELinux) permissions changed (RHBZ 1584982) by mistake behind
ae23c9
QEMU. Specifically, an image in use by Libvirt VM has:
ae23c9
ae23c9
    $ ls -lhZ b.img
ae23c9
    -rw-r--r--. qemu qemu system_u:object_r:svirt_image_t:s0:c600,c690 b.img
ae23c9
ae23c9
Trying to attach it a second time won't work because of image locking.
ae23c9
And after the error, it becomes:
ae23c9
ae23c9
    $ ls -lhZ b.img
ae23c9
    -rw-r--r--. root root system_u:object_r:virt_image_t:s0 b.img
ae23c9
ae23c9
Then, we won't be able to do OFD lock operations with the existing fd.
ae23c9
In other words, the code such as in blk_detach_dev:
ae23c9
ae23c9
    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
ae23c9
ae23c9
can abort() QEMU, out of environmental changes.
ae23c9
ae23c9
This patch is an easy fix to this and the change is regardlessly
ae23c9
reasonable, so do it.
ae23c9
ae23c9
Signed-off-by: Fam Zheng <famz@redhat.com>
ae23c9
Reviewed-by: Max Reitz <mreitz@redhat.com>
ae23c9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
ae23c9
(cherry picked from commit 2996ffad3acabe890fbb4f84a069cdc325a68108)
ae23c9
Signed-off-by: Max Reitz <mreitz@redhat.com>
ae23c9
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
ae23c9
---
ae23c9
 block/file-posix.c | 54 ++++++++++++++++++++++++++++++++++++++++++++----------
ae23c9
 1 file changed, 44 insertions(+), 10 deletions(-)
ae23c9
ae23c9
diff --git a/block/file-posix.c b/block/file-posix.c
ae23c9
index c2403ba..2a05193 100644
ae23c9
--- a/block/file-posix.c
ae23c9
+++ b/block/file-posix.c
ae23c9
@@ -152,6 +152,11 @@ typedef struct BDRVRawState {
ae23c9
     uint64_t perm;
ae23c9
     uint64_t shared_perm;
ae23c9
 
ae23c9
+    /* The perms bits whose corresponding bytes are already locked in
ae23c9
+     * s->lock_fd. */
ae23c9
+    uint64_t locked_perm;
ae23c9
+    uint64_t locked_shared_perm;
ae23c9
+
ae23c9
 #ifdef CONFIG_XFS
ae23c9
     bool is_xfs:1;
ae23c9
 #endif
ae23c9
@@ -677,43 +682,72 @@ typedef enum {
ae23c9
  * file; if @unlock == true, also unlock the unneeded bytes.
ae23c9
  * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
ae23c9
  */
ae23c9
-static int raw_apply_lock_bytes(int fd,
ae23c9
+static int raw_apply_lock_bytes(BDRVRawState *s, int fd,
ae23c9
                                 uint64_t perm_lock_bits,
ae23c9
                                 uint64_t shared_perm_lock_bits,
ae23c9
                                 bool unlock, Error **errp)
ae23c9
 {
ae23c9
     int ret;
ae23c9
     int i;
ae23c9
+    uint64_t locked_perm, locked_shared_perm;
ae23c9
+
ae23c9
+    if (s) {
ae23c9
+        locked_perm = s->locked_perm;
ae23c9
+        locked_shared_perm = s->locked_shared_perm;
ae23c9
+    } else {
ae23c9
+        /*
ae23c9
+         * We don't have the previous bits, just lock/unlock for each of the
ae23c9
+         * requested bits.
ae23c9
+         */
ae23c9
+        if (unlock) {
ae23c9
+            locked_perm = BLK_PERM_ALL;
ae23c9
+            locked_shared_perm = BLK_PERM_ALL;
ae23c9
+        } else {
ae23c9
+            locked_perm = 0;
ae23c9
+            locked_shared_perm = 0;
ae23c9
+        }
ae23c9
+    }
ae23c9
 
ae23c9
     PERM_FOREACH(i) {
ae23c9
         int off = RAW_LOCK_PERM_BASE + i;
ae23c9
-        if (perm_lock_bits & (1ULL << i)) {
ae23c9
+        uint64_t bit = (1ULL << i);
ae23c9
+        if ((perm_lock_bits & bit) && !(locked_perm & bit)) {
ae23c9
             ret = qemu_lock_fd(fd, off, 1, false);
ae23c9
             if (ret) {
ae23c9
                 error_setg(errp, "Failed to lock byte %d", off);
ae23c9
                 return ret;
ae23c9
+            } else if (s) {
ae23c9
+                s->locked_perm |= bit;
ae23c9
             }
ae23c9
-        } else if (unlock) {
ae23c9
+        } else if (unlock && (locked_perm & bit) && !(perm_lock_bits & bit)) {
ae23c9
             ret = qemu_unlock_fd(fd, off, 1);
ae23c9
             if (ret) {
ae23c9
                 error_setg(errp, "Failed to unlock byte %d", off);
ae23c9
                 return ret;
ae23c9
+            } else if (s) {
ae23c9
+                s->locked_perm &= ~bit;
ae23c9
             }
ae23c9
         }
ae23c9
     }
ae23c9
     PERM_FOREACH(i) {
ae23c9
         int off = RAW_LOCK_SHARED_BASE + i;
ae23c9
-        if (shared_perm_lock_bits & (1ULL << i)) {
ae23c9
+        uint64_t bit = (1ULL << i);
ae23c9
+        if ((shared_perm_lock_bits & bit) && !(locked_shared_perm & bit)) {
ae23c9
             ret = qemu_lock_fd(fd, off, 1, false);
ae23c9
             if (ret) {
ae23c9
                 error_setg(errp, "Failed to lock byte %d", off);
ae23c9
                 return ret;
ae23c9
+            } else if (s) {
ae23c9
+                s->locked_shared_perm |= bit;
ae23c9
             }
ae23c9
-        } else if (unlock) {
ae23c9
+        } else if (unlock && (locked_shared_perm & bit) &&
ae23c9
+                   !(shared_perm_lock_bits & bit)) {
ae23c9
             ret = qemu_unlock_fd(fd, off, 1);
ae23c9
             if (ret) {
ae23c9
                 error_setg(errp, "Failed to unlock byte %d", off);
ae23c9
                 return ret;
ae23c9
+            } else if (s) {
ae23c9
+                s->locked_shared_perm &= ~bit;
ae23c9
             }
ae23c9
         }
ae23c9
     }
ae23c9
@@ -781,7 +815,7 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
ae23c9
 
ae23c9
     switch (op) {
ae23c9
     case RAW_PL_PREPARE:
ae23c9
-        ret = raw_apply_lock_bytes(s->lock_fd, s->perm | new_perm,
ae23c9
+        ret = raw_apply_lock_bytes(s, s->lock_fd, s->perm | new_perm,
ae23c9
                                    ~s->shared_perm | ~new_shared,
ae23c9
                                    false, errp);
ae23c9
         if (!ret) {
ae23c9
@@ -796,7 +830,7 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
ae23c9
         op = RAW_PL_ABORT;
ae23c9
         /* fall through to unlock bytes. */
ae23c9
     case RAW_PL_ABORT:
ae23c9
-        raw_apply_lock_bytes(s->lock_fd, s->perm, ~s->shared_perm,
ae23c9
+        raw_apply_lock_bytes(s, s->lock_fd, s->perm, ~s->shared_perm,
ae23c9
                              true, &local_err);
ae23c9
         if (local_err) {
ae23c9
             /* Theoretically the above call only unlocks bytes and it cannot
ae23c9
@@ -806,7 +840,7 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
ae23c9
         }
ae23c9
         break;
ae23c9
     case RAW_PL_COMMIT:
ae23c9
-        raw_apply_lock_bytes(s->lock_fd, new_perm, ~new_shared,
ae23c9
+        raw_apply_lock_bytes(s, s->lock_fd, new_perm, ~new_shared,
ae23c9
                              true, &local_err);
ae23c9
         if (local_err) {
ae23c9
             /* Theoretically the above call only unlocks bytes and it cannot
ae23c9
@@ -2160,7 +2194,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
ae23c9
     shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
ae23c9
 
ae23c9
     /* Step one: Take locks */
ae23c9
-    result = raw_apply_lock_bytes(fd, perm, ~shared, false, errp);
ae23c9
+    result = raw_apply_lock_bytes(NULL, fd, perm, ~shared, false, errp);
ae23c9
     if (result < 0) {
ae23c9
         goto out_close;
ae23c9
     }
ae23c9
@@ -2204,7 +2238,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
ae23c9
     }
ae23c9
 
ae23c9
 out_unlock:
ae23c9
-    raw_apply_lock_bytes(fd, 0, 0, true, &local_err);
ae23c9
+    raw_apply_lock_bytes(NULL, fd, 0, 0, true, &local_err);
ae23c9
     if (local_err) {
ae23c9
         /* The above call should not fail, and if it does, that does
ae23c9
          * not mean the whole creation operation has failed.  So
ae23c9
-- 
ae23c9
1.8.3.1
ae23c9