Pablo Greco e6a3ae
From b9aaae95df14d93aab128376c40943259a453730 Mon Sep 17 00:00:00 2001
Pablo Greco e6a3ae
From: Max Reitz <mreitz@redhat.com>
Pablo Greco e6a3ae
Date: Wed, 3 Apr 2019 17:13:10 +0100
Pablo Greco e6a3ae
Subject: [PATCH 03/11] file-posix: Skip effectiveless OFD lock operations
Pablo Greco e6a3ae
Pablo Greco e6a3ae
RH-Author: Max Reitz <mreitz@redhat.com>
Pablo Greco e6a3ae
Message-id: <20190403171315.20841-4-mreitz@redhat.com>
Pablo Greco e6a3ae
Patchwork-id: 85401
Pablo Greco e6a3ae
O-Subject: [RHEL-8.1 qemu-kvm PATCH 3/8] file-posix: Skip effectiveless OFD lock operations
Pablo Greco e6a3ae
Bugzilla: 1694148
Pablo Greco e6a3ae
RH-Acked-by: John Snow <jsnow@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Pablo Greco e6a3ae
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
From: Fam Zheng <famz@redhat.com>
Pablo Greco e6a3ae
Pablo Greco e6a3ae
If we know we've already locked the bytes, don't do it again; similarly
Pablo Greco e6a3ae
don't unlock a byte if we haven't locked it. This doesn't change the
Pablo Greco e6a3ae
behavior, but fixes a corner case explained below.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Libvirt had an error handling bug that an image can get its (ownership,
Pablo Greco e6a3ae
file mode, SELinux) permissions changed (RHBZ 1584982) by mistake behind
Pablo Greco e6a3ae
QEMU. Specifically, an image in use by Libvirt VM has:
Pablo Greco e6a3ae
Pablo Greco e6a3ae
    $ ls -lhZ b.img
Pablo Greco e6a3ae
    -rw-r--r--. qemu qemu system_u:object_r:svirt_image_t:s0:c600,c690 b.img
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Trying to attach it a second time won't work because of image locking.
Pablo Greco e6a3ae
And after the error, it becomes:
Pablo Greco e6a3ae
Pablo Greco e6a3ae
    $ ls -lhZ b.img
Pablo Greco e6a3ae
    -rw-r--r--. root root system_u:object_r:virt_image_t:s0 b.img
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Then, we won't be able to do OFD lock operations with the existing fd.
Pablo Greco e6a3ae
In other words, the code such as in blk_detach_dev:
Pablo Greco e6a3ae
Pablo Greco e6a3ae
    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
Pablo Greco e6a3ae
Pablo Greco e6a3ae
can abort() QEMU, out of environmental changes.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
This patch is an easy fix to this and the change is regardlessly
Pablo Greco e6a3ae
reasonable, so do it.
Pablo Greco e6a3ae
Pablo Greco e6a3ae
Signed-off-by: Fam Zheng <famz@redhat.com>
Pablo Greco e6a3ae
Reviewed-by: Max Reitz <mreitz@redhat.com>
Pablo Greco e6a3ae
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Pablo Greco e6a3ae
(cherry picked from commit 2996ffad3acabe890fbb4f84a069cdc325a68108)
Pablo Greco e6a3ae
Signed-off-by: Max Reitz <mreitz@redhat.com>
Pablo Greco e6a3ae
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
Pablo Greco e6a3ae
---
Pablo Greco e6a3ae
 block/file-posix.c | 54 ++++++++++++++++++++++++++++++++++++++++++++----------
Pablo Greco e6a3ae
 1 file changed, 44 insertions(+), 10 deletions(-)
Pablo Greco e6a3ae
Pablo Greco e6a3ae
diff --git a/block/file-posix.c b/block/file-posix.c
Pablo Greco e6a3ae
index c2403ba..2a05193 100644
Pablo Greco e6a3ae
--- a/block/file-posix.c
Pablo Greco e6a3ae
+++ b/block/file-posix.c
Pablo Greco e6a3ae
@@ -152,6 +152,11 @@ typedef struct BDRVRawState {
Pablo Greco e6a3ae
     uint64_t perm;
Pablo Greco e6a3ae
     uint64_t shared_perm;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
+    /* The perms bits whose corresponding bytes are already locked in
Pablo Greco e6a3ae
+     * s->lock_fd. */
Pablo Greco e6a3ae
+    uint64_t locked_perm;
Pablo Greco e6a3ae
+    uint64_t locked_shared_perm;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
 #ifdef CONFIG_XFS
Pablo Greco e6a3ae
     bool is_xfs:1;
Pablo Greco e6a3ae
 #endif
Pablo Greco e6a3ae
@@ -677,43 +682,72 @@ typedef enum {
Pablo Greco e6a3ae
  * file; if @unlock == true, also unlock the unneeded bytes.
Pablo Greco e6a3ae
  * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
Pablo Greco e6a3ae
  */
Pablo Greco e6a3ae
-static int raw_apply_lock_bytes(int fd,
Pablo Greco e6a3ae
+static int raw_apply_lock_bytes(BDRVRawState *s, int fd,
Pablo Greco e6a3ae
                                 uint64_t perm_lock_bits,
Pablo Greco e6a3ae
                                 uint64_t shared_perm_lock_bits,
Pablo Greco e6a3ae
                                 bool unlock, Error **errp)
Pablo Greco e6a3ae
 {
Pablo Greco e6a3ae
     int ret;
Pablo Greco e6a3ae
     int i;
Pablo Greco e6a3ae
+    uint64_t locked_perm, locked_shared_perm;
Pablo Greco e6a3ae
+
Pablo Greco e6a3ae
+    if (s) {
Pablo Greco e6a3ae
+        locked_perm = s->locked_perm;
Pablo Greco e6a3ae
+        locked_shared_perm = s->locked_shared_perm;
Pablo Greco e6a3ae
+    } else {
Pablo Greco e6a3ae
+        /*
Pablo Greco e6a3ae
+         * We don't have the previous bits, just lock/unlock for each of the
Pablo Greco e6a3ae
+         * requested bits.
Pablo Greco e6a3ae
+         */
Pablo Greco e6a3ae
+        if (unlock) {
Pablo Greco e6a3ae
+            locked_perm = BLK_PERM_ALL;
Pablo Greco e6a3ae
+            locked_shared_perm = BLK_PERM_ALL;
Pablo Greco e6a3ae
+        } else {
Pablo Greco e6a3ae
+            locked_perm = 0;
Pablo Greco e6a3ae
+            locked_shared_perm = 0;
Pablo Greco e6a3ae
+        }
Pablo Greco e6a3ae
+    }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     PERM_FOREACH(i) {
Pablo Greco e6a3ae
         int off = RAW_LOCK_PERM_BASE + i;
Pablo Greco e6a3ae
-        if (perm_lock_bits & (1ULL << i)) {
Pablo Greco e6a3ae
+        uint64_t bit = (1ULL << i);
Pablo Greco e6a3ae
+        if ((perm_lock_bits & bit) && !(locked_perm & bit)) {
Pablo Greco e6a3ae
             ret = qemu_lock_fd(fd, off, 1, false);
Pablo Greco e6a3ae
             if (ret) {
Pablo Greco e6a3ae
                 error_setg(errp, "Failed to lock byte %d", off);
Pablo Greco e6a3ae
                 return ret;
Pablo Greco e6a3ae
+            } else if (s) {
Pablo Greco e6a3ae
+                s->locked_perm |= bit;
Pablo Greco e6a3ae
             }
Pablo Greco e6a3ae
-        } else if (unlock) {
Pablo Greco e6a3ae
+        } else if (unlock && (locked_perm & bit) && !(perm_lock_bits & bit)) {
Pablo Greco e6a3ae
             ret = qemu_unlock_fd(fd, off, 1);
Pablo Greco e6a3ae
             if (ret) {
Pablo Greco e6a3ae
                 error_setg(errp, "Failed to unlock byte %d", off);
Pablo Greco e6a3ae
                 return ret;
Pablo Greco e6a3ae
+            } else if (s) {
Pablo Greco e6a3ae
+                s->locked_perm &= ~bit;
Pablo Greco e6a3ae
             }
Pablo Greco e6a3ae
         }
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
     PERM_FOREACH(i) {
Pablo Greco e6a3ae
         int off = RAW_LOCK_SHARED_BASE + i;
Pablo Greco e6a3ae
-        if (shared_perm_lock_bits & (1ULL << i)) {
Pablo Greco e6a3ae
+        uint64_t bit = (1ULL << i);
Pablo Greco e6a3ae
+        if ((shared_perm_lock_bits & bit) && !(locked_shared_perm & bit)) {
Pablo Greco e6a3ae
             ret = qemu_lock_fd(fd, off, 1, false);
Pablo Greco e6a3ae
             if (ret) {
Pablo Greco e6a3ae
                 error_setg(errp, "Failed to lock byte %d", off);
Pablo Greco e6a3ae
                 return ret;
Pablo Greco e6a3ae
+            } else if (s) {
Pablo Greco e6a3ae
+                s->locked_shared_perm |= bit;
Pablo Greco e6a3ae
             }
Pablo Greco e6a3ae
-        } else if (unlock) {
Pablo Greco e6a3ae
+        } else if (unlock && (locked_shared_perm & bit) &&
Pablo Greco e6a3ae
+                   !(shared_perm_lock_bits & bit)) {
Pablo Greco e6a3ae
             ret = qemu_unlock_fd(fd, off, 1);
Pablo Greco e6a3ae
             if (ret) {
Pablo Greco e6a3ae
                 error_setg(errp, "Failed to unlock byte %d", off);
Pablo Greco e6a3ae
                 return ret;
Pablo Greco e6a3ae
+            } else if (s) {
Pablo Greco e6a3ae
+                s->locked_shared_perm &= ~bit;
Pablo Greco e6a3ae
             }
Pablo Greco e6a3ae
         }
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
@@ -781,7 +815,7 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     switch (op) {
Pablo Greco e6a3ae
     case RAW_PL_PREPARE:
Pablo Greco e6a3ae
-        ret = raw_apply_lock_bytes(s->lock_fd, s->perm | new_perm,
Pablo Greco e6a3ae
+        ret = raw_apply_lock_bytes(s, s->lock_fd, s->perm | new_perm,
Pablo Greco e6a3ae
                                    ~s->shared_perm | ~new_shared,
Pablo Greco e6a3ae
                                    false, errp);
Pablo Greco e6a3ae
         if (!ret) {
Pablo Greco e6a3ae
@@ -796,7 +830,7 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
Pablo Greco e6a3ae
         op = RAW_PL_ABORT;
Pablo Greco e6a3ae
         /* fall through to unlock bytes. */
Pablo Greco e6a3ae
     case RAW_PL_ABORT:
Pablo Greco e6a3ae
-        raw_apply_lock_bytes(s->lock_fd, s->perm, ~s->shared_perm,
Pablo Greco e6a3ae
+        raw_apply_lock_bytes(s, s->lock_fd, s->perm, ~s->shared_perm,
Pablo Greco e6a3ae
                              true, &local_err);
Pablo Greco e6a3ae
         if (local_err) {
Pablo Greco e6a3ae
             /* Theoretically the above call only unlocks bytes and it cannot
Pablo Greco e6a3ae
@@ -806,7 +840,7 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
Pablo Greco e6a3ae
         }
Pablo Greco e6a3ae
         break;
Pablo Greco e6a3ae
     case RAW_PL_COMMIT:
Pablo Greco e6a3ae
-        raw_apply_lock_bytes(s->lock_fd, new_perm, ~new_shared,
Pablo Greco e6a3ae
+        raw_apply_lock_bytes(s, s->lock_fd, new_perm, ~new_shared,
Pablo Greco e6a3ae
                              true, &local_err);
Pablo Greco e6a3ae
         if (local_err) {
Pablo Greco e6a3ae
             /* Theoretically the above call only unlocks bytes and it cannot
Pablo Greco e6a3ae
@@ -2160,7 +2194,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
Pablo Greco e6a3ae
     shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
     /* Step one: Take locks */
Pablo Greco e6a3ae
-    result = raw_apply_lock_bytes(fd, perm, ~shared, false, errp);
Pablo Greco e6a3ae
+    result = raw_apply_lock_bytes(NULL, fd, perm, ~shared, false, errp);
Pablo Greco e6a3ae
     if (result < 0) {
Pablo Greco e6a3ae
         goto out_close;
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
@@ -2204,7 +2238,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
Pablo Greco e6a3ae
     }
Pablo Greco e6a3ae
 
Pablo Greco e6a3ae
 out_unlock:
Pablo Greco e6a3ae
-    raw_apply_lock_bytes(fd, 0, 0, true, &local_err);
Pablo Greco e6a3ae
+    raw_apply_lock_bytes(NULL, fd, 0, 0, true, &local_err);
Pablo Greco e6a3ae
     if (local_err) {
Pablo Greco e6a3ae
         /* The above call should not fail, and if it does, that does
Pablo Greco e6a3ae
          * not mean the whole creation operation has failed.  So
Pablo Greco e6a3ae
-- 
Pablo Greco e6a3ae
1.8.3.1
Pablo Greco e6a3ae