diff --git a/SOURCES/kvm-Drop-bogus-IPv6-messages.patch b/SOURCES/kvm-Drop-bogus-IPv6-messages.patch new file mode 100644 index 0000000..4c30a3b --- /dev/null +++ b/SOURCES/kvm-Drop-bogus-IPv6-messages.patch @@ -0,0 +1,51 @@ +From 89c4300c97739aa3291f0322037bb65068e08d41 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 19 Jan 2021 23:34:33 -0500 +Subject: [PATCH] Drop bogus IPv6 messages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20210119233433.1352902-2-jmaloy@redhat.com> +Patchwork-id: 100695 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] Drop bogus IPv6 messages +Bugzilla: 1918054 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: Ralf Haferkamp + +Drop IPv6 message shorter than what's mentioned in the payload +length header (+ the size of the IPv6 header). They're invalid an could +lead to data leakage in icmp6_send_echoreply(). + +(cherry picked from libslirp commit c7ede54cbd2e2b25385325600958ba0124e31cc0) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip6_input.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +index d9d2b7e9cd4..0f2b17853ad 100644 +--- a/slirp/src/ip6_input.c ++++ b/slirp/src/ip6_input.c +@@ -49,6 +49,13 @@ void ip6_input(struct mbuf *m) + goto bad; + } + ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ + /* check ip_ttl for a correct ICMP reply */ + if (ip6->ip_hl == 0) { + icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); +-- +2.27.0 + diff --git a/SOURCES/kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch b/SOURCES/kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch new file mode 100644 index 0000000..1188911 --- /dev/null +++ b/SOURCES/kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch @@ -0,0 +1,77 @@ +From e191ab6358b656764374ff1b3c7224a744dc902a Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 26 Jan 2021 17:21:02 -0500 +Subject: [PATCH 7/9] block: Require aligned image size to avoid assertion + failure + +RH-Author: Kevin Wolf +Message-id: <20210126172103.136060-2-kwolf@redhat.com> +Patchwork-id: 100786 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/2] block: Require aligned image size to avoid assertion failure +Bugzilla: 1834281 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +Unaligned requests will automatically be aligned to bl.request_alignment +and we can't extend write requests to access space beyond the end of the +image without resizing the image, so if we have the WRITE permission, +but not the RESIZE one, it's required that the image size is aligned. + +Failing to meet this requirement could cause assertion failures like +this if RESIZE permissions weren't requested: + +qemu-img: block/io.c:1910: bdrv_co_write_req_prepare: Assertion `end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE' failed. + +This was e.g. triggered by qemu-img converting to a target image with 4k +request alignment when the image was only aligned to 512 bytes, but not +to 4k. + +Turn this into a graceful error in bdrv_check_perm() so that WRITE +without RESIZE can only be taken if the image size is aligned. If a user +holds both permissions and drops only RESIZE, the function will return +an error, but bdrv_child_try_set_perm() will ignore the failure silently +if permissions are only requested to be relaxed and just keep both +permissions while returning success. + +Signed-off-by: Kevin Wolf +Message-Id: <20200716142601.111237-2-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 9c60a5d1978e6dcf85c0e01b50e6f7f54ca09104) +Signed-off-by: Kevin Wolf +Signed-off-by: Jon Maloy +--- + block.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/block.c b/block.c +index 57740d312e..e9579ddf84 100644 +--- a/block.c ++++ b/block.c +@@ -2009,6 +2009,22 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, + return -EPERM; + } + ++ /* ++ * Unaligned requests will automatically be aligned to bl.request_alignment ++ * and without RESIZE we can't extend requests to write to space beyond the ++ * end of the image, so it's required that the image size is aligned. ++ */ ++ if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && ++ !(cumulative_perms & BLK_PERM_RESIZE)) ++ { ++ if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) { ++ error_setg(errp, "Cannot get 'write' permission without 'resize': " ++ "Image size is not a multiple of request " ++ "alignment"); ++ return -EPERM; ++ } ++ } ++ + /* Check this node */ + if (!drv) { + return 0; +-- +2.18.2 + diff --git a/SOURCES/kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch b/SOURCES/kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch new file mode 100644 index 0000000..fe8c49b --- /dev/null +++ b/SOURCES/kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch @@ -0,0 +1,100 @@ +From b9b77159567283628645943b5367d39b558e8faa Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 26 Jan 2021 20:07:59 -0500 +Subject: [PATCH 9/9] block/iscsi:fix heap-buffer-overflow in + iscsi_aio_ioctl_cb +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20210126200759.245891-2-jmaloy@redhat.com> +Patchwork-id: 100787 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] block/iscsi:fix heap-buffer-overflow in iscsi_aio_ioctl_cb +Bugzilla: 1912974 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek + +From: Chen Qun + +There is an overflow, the source 'datain.data[2]' is 100 bytes, + but the 'ss' is 252 bytes.This may cause a security issue because + we can access a lot of unrelated memory data. + +The len for sbp copy data should take the minimum of mx_sb_len and + sb_len_wr, not the maximum. + +If we use iscsi device for VM backend storage, ASAN show stack: + +READ of size 252 at 0xfffd149dcfc4 thread T0 + #0 0xaaad433d0d34 in __asan_memcpy (aarch64-softmmu/qemu-system-aarch64+0x2cb0d34) + #1 0xaaad45f9d6d0 in iscsi_aio_ioctl_cb /qemu/block/iscsi.c:996:9 + #2 0xfffd1af0e2dc (/usr/lib64/iscsi/libiscsi.so.8+0xe2dc) + #3 0xfffd1af0d174 (/usr/lib64/iscsi/libiscsi.so.8+0xd174) + #4 0xfffd1af19fac (/usr/lib64/iscsi/libiscsi.so.8+0x19fac) + #5 0xaaad45f9acc8 in iscsi_process_read /qemu/block/iscsi.c:403:5 + #6 0xaaad4623733c in aio_dispatch_handler /qemu/util/aio-posix.c:467:9 + #7 0xaaad4622f350 in aio_dispatch_handlers /qemu/util/aio-posix.c:510:20 + #8 0xaaad4622f350 in aio_dispatch /qemu/util/aio-posix.c:520 + #9 0xaaad46215944 in aio_ctx_dispatch /qemu/util/async.c:298:5 + #10 0xfffd1bed12f4 in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x512f4) + #11 0xaaad46227de0 in glib_pollfds_poll /qemu/util/main-loop.c:219:9 + #12 0xaaad46227de0 in os_host_main_loop_wait /qemu/util/main-loop.c:242 + #13 0xaaad46227de0 in main_loop_wait /qemu/util/main-loop.c:518 + #14 0xaaad43d9d60c in qemu_main_loop /qemu/softmmu/vl.c:1662:9 + #15 0xaaad4607a5b0 in main /qemu/softmmu/main.c:49:5 + #16 0xfffd1a460b9c in __libc_start_main (/lib64/libc.so.6+0x20b9c) + #17 0xaaad43320740 in _start (aarch64-softmmu/qemu-system-aarch64+0x2c00740) + +0xfffd149dcfc4 is located 0 bytes to the right of 100-byte region [0xfffd149dcf60,0xfffd149dcfc4) +allocated by thread T0 here: + #0 0xaaad433d1e70 in __interceptor_malloc (aarch64-softmmu/qemu-system-aarch64+0x2cb1e70) + #1 0xfffd1af0e254 (/usr/lib64/iscsi/libiscsi.so.8+0xe254) + #2 0xfffd1af0d174 (/usr/lib64/iscsi/libiscsi.so.8+0xd174) + #3 0xfffd1af19fac (/usr/lib64/iscsi/libiscsi.so.8+0x19fac) + #4 0xaaad45f9acc8 in iscsi_process_read /qemu/block/iscsi.c:403:5 + #5 0xaaad4623733c in aio_dispatch_handler /qemu/util/aio-posix.c:467:9 + #6 0xaaad4622f350 in aio_dispatch_handlers /qemu/util/aio-posix.c:510:20 + #7 0xaaad4622f350 in aio_dispatch /qemu/util/aio-posix.c:520 + #8 0xaaad46215944 in aio_ctx_dispatch /qemu/util/async.c:298:5 + #9 0xfffd1bed12f4 in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x512f4) + #10 0xaaad46227de0 in glib_pollfds_poll /qemu/util/main-loop.c:219:9 + #11 0xaaad46227de0 in os_host_main_loop_wait /qemu/util/main-loop.c:242 + #12 0xaaad46227de0 in main_loop_wait /qemu/util/main-loop.c:518 + #13 0xaaad43d9d60c in qemu_main_loop /qemu/softmmu/vl.c:1662:9 + #14 0xaaad4607a5b0 in main /qemu/softmmu/main.c:49:5 + #15 0xfffd1a460b9c in __libc_start_main (/lib64/libc.so.6+0x20b9c) + #16 0xaaad43320740 in _start (aarch64-softmmu/qemu-system-aarch64+0x2c00740) + +Reported-by: Euler Robot +Signed-off-by: Chen Qun +Reviewed-by: Stefan Hajnoczi +Message-id: 20200418062602.10776-1-kuhn.chenqun@huawei.com +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Peter Maydell + +(cherry picked from ff0507c239a246fd7215b31c5658fc6a3ee1e4c5) +Signed-off-by: Jon Maloy +Signed-off-by: Jon Maloy +--- + block/iscsi.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index 0bea2d3a93..06915655b3 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -991,8 +991,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, + acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE; + + acb->ioh->sb_len_wr = acb->task->datain.size - 2; +- ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ? +- acb->ioh->mx_sb_len : acb->ioh->sb_len_wr; ++ ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr); + memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss); + } + +-- +2.18.2 + diff --git a/SOURCES/kvm-error-Document-Error-API-usage-rules.patch b/SOURCES/kvm-error-Document-Error-API-usage-rules.patch new file mode 100644 index 0000000..fb9f1b0 --- /dev/null +++ b/SOURCES/kvm-error-Document-Error-API-usage-rules.patch @@ -0,0 +1,154 @@ +From b2ac3e491eb7f18a421e2b1132e527d484681767 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:09 -0500 +Subject: [PATCH 08/14] error: Document Error API usage rules +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-5-marcandre.lureau@redhat.com> +Patchwork-id: 100477 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 04/10] error: Document Error API usage rules +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Markus Armbruster + +This merely codifies existing practice, with one exception: the rule +advising against returning void, where existing practice is mixed. + +When the Error API was created, we adopted the (unwritten) rule to +return void when the function returns no useful value on success, +unlike GError, which recommends to return true on success and false on +error then. + +When a function returns a distinct error value, say false, a checked +call that passes the error up looks like + + if (!frobnicate(..., errp)) { + handle the error... + } + +When it returns void, we need + + Error *err = NULL; + + frobnicate(..., &err); + if (err) { + handle the error... + error_propagate(errp, err); + } + +Not only is this more verbose, it also creates an Error object even +when @errp is null, &error_abort or &error_fatal. + +People got tired of the additional boilerplate, and started to ignore +the unwritten rule. The result is confusion among developers about +the preferred usage. + +Make the rule advising against returning void official by putting it +in writing. This will hopefully reduce confusion. + +Update the examples accordingly. + +The remainder of this series will update a substantial amount of code +to honor the rule. + +Signed-off-by: Markus Armbruster +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Greg Kurz +Message-Id: <20200707160613.848843-4-armbru@redhat.com> + +(cherry picked from commit e3fe3988d7851cac30abffae06d2f555ff7bee62) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 52 +++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 46 insertions(+), 6 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 3351fe76368..08d48e74836 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -15,6 +15,33 @@ + /* + * Error reporting system loosely patterned after Glib's GError. + * ++ * = Rules = ++ * ++ * - Functions that use Error to report errors have an Error **errp ++ * parameter. It should be the last parameter, except for functions ++ * taking variable arguments. ++ * ++ * - You may pass NULL to not receive the error, &error_abort to abort ++ * on error, &error_fatal to exit(1) on error, or a pointer to a ++ * variable containing NULL to receive the error. ++ * ++ * - Separation of concerns: the function is responsible for detecting ++ * errors and failing cleanly; handling the error is its caller's ++ * job. Since the value of @errp is about handling the error, the ++ * function should not examine it. ++ * ++ * - On success, the function should not touch *errp. On failure, it ++ * should set a new error, e.g. with error_setg(errp, ...), or ++ * propagate an existing one, e.g. with error_propagate(errp, ...). ++ * ++ * - Whenever practical, also return a value that indicates success / ++ * failure. This can make the error checking more concise, and can ++ * avoid useless error object creation and destruction. Note that ++ * we still have many functions returning void. We recommend ++ * • bool-valued functions return true on success / false on failure, ++ * • pointer-valued functions return non-null / null pointer, and ++ * • integer-valued functions return non-negative / negative. ++ * + * = Creating errors = + * + * Create an error: +@@ -95,14 +122,13 @@ + * Create a new error and pass it to the caller: + * error_setg(errp, "situation normal, all fouled up"); + * +- * Call a function and receive an error from it: +- * Error *err = NULL; +- * foo(arg, &err); +- * if (err) { ++ * Call a function, receive an error from it, and pass it to the caller ++ * - when the function returns a value that indicates failure, say ++ * false: ++ * if (!foo(arg, errp)) { + * handle the error... + * } +- * +- * Receive an error and pass it on to the caller: ++ * - when it does not, say because it is a void function: + * Error *err = NULL; + * foo(arg, &err); + * if (err) { +@@ -120,6 +146,20 @@ + * foo(arg, errp); + * for readability. + * ++ * Receive an error, and handle it locally ++ * - when the function returns a value that indicates failure, say ++ * false: ++ * Error *err = NULL; ++ * if (!foo(arg, &err)) { ++ * handle the error... ++ * } ++ * - when it does not, say because it is a void function: ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * if (err) { ++ * handle the error... ++ * } ++ * + * Receive and accumulate multiple errors (first one wins): + * Error *err = NULL, *local_err = NULL; + * foo(arg, &err); +-- +2.27.0 + diff --git a/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch b/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch new file mode 100644 index 0000000..ee14eb5 --- /dev/null +++ b/SOURCES/kvm-error-Fix-examples-in-error.h-s-big-comment.patch @@ -0,0 +1,85 @@ +From fe7dd779a9674dc54ffe296247ae6559f2b55b22 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:07 -0500 +Subject: [PATCH 06/14] error: Fix examples in error.h's big comment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-3-marcandre.lureau@redhat.com> +Patchwork-id: 100473 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 02/10] error: Fix examples in error.h's big comment +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Markus Armbruster + +Mark a bad example more clearly. Fix the error_propagate_prepend() +example. Add a missing declaration and a second error pileup example. + +Signed-off-by: Markus Armbruster +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Greg Kurz +Message-Id: <20200707160613.848843-2-armbru@redhat.com> + +(cherry picked from commit 47ff5ac81e8bb3096500de7b132051691d533d36) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 3f95141a01a..83c38f9a188 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -24,7 +24,7 @@ + * "charm, top, bottom.\n"); + * + * Do *not* contract this to +- * error_setg(&err, "invalid quark\n" ++ * error_setg(&err, "invalid quark\n" // WRONG! + * "Valid quarks are up, down, strange, charm, top, bottom."); + * + * Report an error to the current monitor if we have one, else stderr: +@@ -52,7 +52,8 @@ + * where Error **errp is a parameter, by convention the last one. + * + * Pass an existing error to the caller with the message modified: +- * error_propagate_prepend(errp, err); ++ * error_propagate_prepend(errp, err, ++ * "Could not frobnicate '%s': ", name); + * + * Avoid + * error_propagate(errp, err); +@@ -108,12 +109,23 @@ + * } + * + * Do *not* "optimize" this to ++ * Error *err = NULL; + * foo(arg, &err); + * bar(arg, &err); // WRONG! + * if (err) { + * handle the error... + * } + * because this may pass a non-null err to bar(). ++ * ++ * Likewise, do *not* ++ * Error *err = NULL; ++ * if (cond1) { ++ * error_setg(&err, ...); ++ * } ++ * if (cond2) { ++ * error_setg(&err, ...); // WRONG! ++ * } ++ * because this may pass a non-null err to error_setg(). + */ + + #ifndef ERROR_H +-- +2.27.0 + diff --git a/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch b/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch new file mode 100644 index 0000000..0ad4367 --- /dev/null +++ b/SOURCES/kvm-error-Improve-error.h-s-big-comment.patch @@ -0,0 +1,146 @@ +From 439c11850165fd838e367aa6d4fff4af951a5bd9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:08 -0500 +Subject: [PATCH 07/14] error: Improve error.h's big comment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-4-marcandre.lureau@redhat.com> +Patchwork-id: 100474 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 03/10] error: Improve error.h's big comment +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Markus Armbruster + +Add headlines to the big comment. + +Explain examples for NULL, &error_abort and &error_fatal argument +better. + +Tweak rationale for error_propagate_prepend(). + +Signed-off-by: Markus Armbruster +Message-Id: <20200707160613.848843-3-armbru@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Greg Kurz + +(cherry picked from commit 9aac7d486cc792191c25c30851f501624b0c2751) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 51 +++++++++++++++++++++++++++++++------------- + 1 file changed, 36 insertions(+), 15 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 83c38f9a188..3351fe76368 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -15,6 +15,8 @@ + /* + * Error reporting system loosely patterned after Glib's GError. + * ++ * = Creating errors = ++ * + * Create an error: + * error_setg(&err, "situation normal, all fouled up"); + * +@@ -27,6 +29,8 @@ + * error_setg(&err, "invalid quark\n" // WRONG! + * "Valid quarks are up, down, strange, charm, top, bottom."); + * ++ * = Reporting and destroying errors = ++ * + * Report an error to the current monitor if we have one, else stderr: + * error_report_err(err); + * This frees the error object. +@@ -40,6 +44,30 @@ + * error_free(err); + * Note that this loses hints added with error_append_hint(). + * ++ * Call a function ignoring errors: ++ * foo(arg, NULL); ++ * This is more concise than ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * error_free(err); // don't do this ++ * ++ * Call a function aborting on errors: ++ * foo(arg, &error_abort); ++ * This is more concise and fails more nicely than ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * assert(!err); // don't do this ++ * ++ * Call a function treating errors as fatal: ++ * foo(arg, &error_fatal); ++ * This is more concise than ++ * Error *err = NULL; ++ * foo(arg, &err); ++ * if (err) { // don't do this ++ * error_report_err(err); ++ * exit(1); ++ * } ++ * + * Handle an error without reporting it (just for completeness): + * error_free(err); + * +@@ -47,6 +75,11 @@ + * reporting it (primarily useful in testsuites): + * error_free_or_abort(&err); + * ++ * = Passing errors around = ++ * ++ * Errors get passed to the caller through the conventional @errp ++ * parameter. ++ * + * Pass an existing error to the caller: + * error_propagate(errp, err); + * where Error **errp is a parameter, by convention the last one. +@@ -54,11 +87,10 @@ + * Pass an existing error to the caller with the message modified: + * error_propagate_prepend(errp, err, + * "Could not frobnicate '%s': ", name); +- * +- * Avoid +- * error_propagate(errp, err); ++ * This is more concise than ++ * error_propagate(errp, err); // don't do this + * error_prepend(errp, "Could not frobnicate '%s': ", name); +- * because this fails to prepend when @errp is &error_fatal. ++ * and works even when @errp is &error_fatal. + * + * Create a new error and pass it to the caller: + * error_setg(errp, "situation normal, all fouled up"); +@@ -70,15 +102,6 @@ + * handle the error... + * } + * +- * Call a function ignoring errors: +- * foo(arg, NULL); +- * +- * Call a function aborting on errors: +- * foo(arg, &error_abort); +- * +- * Call a function treating errors as fatal: +- * foo(arg, &error_fatal); +- * + * Receive an error and pass it on to the caller: + * Error *err = NULL; + * foo(arg, &err); +@@ -86,8 +109,6 @@ + * handle the error... + * error_propagate(errp, err); + * } +- * where Error **errp is a parameter, by convention the last one. +- * + * Do *not* "optimize" this to + * foo(arg, errp); + * if (*errp) { // WRONG! +-- +2.27.0 + diff --git a/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch b/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch new file mode 100644 index 0000000..d67ad7c --- /dev/null +++ b/SOURCES/kvm-error-New-macro-ERRP_GUARD.patch @@ -0,0 +1,305 @@ +From 46c3298774b976cc6a1cd834751e644fb482b08e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:10 -0500 +Subject: [PATCH 09/14] error: New macro ERRP_GUARD() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-6-marcandre.lureau@redhat.com> +Patchwork-id: 100476 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 05/10] error: New macro ERRP_GUARD() +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Vladimir Sementsov-Ogievskiy + +Introduce a new ERRP_GUARD() macro, to be used at start of functions +with an errp OUT parameter. + +It has three goals: + +1. Fix issue with error_fatal and error_prepend/error_append_hint: the +user can't see this additional information, because exit() happens in +error_setg earlier than information is added. [Reported by Greg Kurz] + +2. Fix issue with error_abort and error_propagate: when we wrap +error_abort by local_err+error_propagate, the resulting coredump will +refer to error_propagate and not to the place where error happened. +(the macro itself doesn't fix the issue, but it allows us to [3.] drop +the local_err+error_propagate pattern, which will definitely fix the +issue) [Reported by Kevin Wolf] + +3. Drop local_err+error_propagate pattern, which is used to workaround +void functions with errp parameter, when caller wants to know resulting +status. (Note: actually these functions could be merely updated to +return int error code). + +To achieve these goals, later patches will add invocations +of this macro at the start of functions with either use +error_prepend/error_append_hint (solving 1) or which use +local_err+error_propagate to check errors, switching those +functions to use *errp instead (solving 2 and 3). + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Paul Durrant +Reviewed-by: Greg Kurz +Reviewed-by: Eric Blake +[Merge comments properly with recent commit "error: Document Error API +usage rules", and edit for clarity. Put ERRP_AUTO_PROPAGATE() before +its helpers, and touch up style. Tweak commit message.] +Signed-off-by: Markus Armbruster +Message-Id: <20200707165037.1026246-2-armbru@redhat.com> + +(cherry picked from commit ae7c80a7bd73685437bf6ba9d7c26098351f4166) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qapi/error.h | 158 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 139 insertions(+), 19 deletions(-) + +diff --git a/include/qapi/error.h b/include/qapi/error.h +index 08d48e74836..e658790acfc 100644 +--- a/include/qapi/error.h ++++ b/include/qapi/error.h +@@ -30,6 +30,10 @@ + * job. Since the value of @errp is about handling the error, the + * function should not examine it. + * ++ * - The function may pass @errp to functions it calls to pass on ++ * their errors to its caller. If it dereferences @errp to check ++ * for errors, it must use ERRP_GUARD(). ++ * + * - On success, the function should not touch *errp. On failure, it + * should set a new error, e.g. with error_setg(errp, ...), or + * propagate an existing one, e.g. with error_propagate(errp, ...). +@@ -45,15 +49,17 @@ + * = Creating errors = + * + * Create an error: +- * error_setg(&err, "situation normal, all fouled up"); ++ * error_setg(errp, "situation normal, all fouled up"); ++ * where @errp points to the location to receive the error. + * + * Create an error and add additional explanation: +- * error_setg(&err, "invalid quark"); +- * error_append_hint(&err, "Valid quarks are up, down, strange, " ++ * error_setg(errp, "invalid quark"); ++ * error_append_hint(errp, "Valid quarks are up, down, strange, " + * "charm, top, bottom.\n"); ++ * This may require use of ERRP_GUARD(); more on that below. + * + * Do *not* contract this to +- * error_setg(&err, "invalid quark\n" // WRONG! ++ * error_setg(errp, "invalid quark\n" // WRONG! + * "Valid quarks are up, down, strange, charm, top, bottom."); + * + * = Reporting and destroying errors = +@@ -107,18 +113,6 @@ + * Errors get passed to the caller through the conventional @errp + * parameter. + * +- * Pass an existing error to the caller: +- * error_propagate(errp, err); +- * where Error **errp is a parameter, by convention the last one. +- * +- * Pass an existing error to the caller with the message modified: +- * error_propagate_prepend(errp, err, +- * "Could not frobnicate '%s': ", name); +- * This is more concise than +- * error_propagate(errp, err); // don't do this +- * error_prepend(errp, "Could not frobnicate '%s': ", name); +- * and works even when @errp is &error_fatal. +- * + * Create a new error and pass it to the caller: + * error_setg(errp, "situation normal, all fouled up"); + * +@@ -129,18 +123,26 @@ + * handle the error... + * } + * - when it does not, say because it is a void function: ++ * ERRP_GUARD(); ++ * foo(arg, errp); ++ * if (*errp) { ++ * handle the error... ++ * } ++ * More on ERRP_GUARD() below. ++ * ++ * Code predating ERRP_GUARD() still exists, and looks like this: + * Error *err = NULL; + * foo(arg, &err); + * if (err) { + * handle the error... +- * error_propagate(errp, err); ++ * error_propagate(errp, err); // deprecated + * } +- * Do *not* "optimize" this to ++ * Avoid in new code. Do *not* "optimize" it to + * foo(arg, errp); + * if (*errp) { // WRONG! + * handle the error... + * } +- * because errp may be NULL! ++ * because errp may be NULL without the ERRP_GUARD() guard. + * + * But when all you do with the error is pass it on, please use + * foo(arg, errp); +@@ -160,6 +162,19 @@ + * handle the error... + * } + * ++ * Pass an existing error to the caller: ++ * error_propagate(errp, err); ++ * This is rarely needed. When @err is a local variable, use of ++ * ERRP_GUARD() commonly results in more readable code. ++ * ++ * Pass an existing error to the caller with the message modified: ++ * error_propagate_prepend(errp, err, ++ * "Could not frobnicate '%s': ", name); ++ * This is more concise than ++ * error_propagate(errp, err); // don't do this ++ * error_prepend(errp, "Could not frobnicate '%s': ", name); ++ * and works even when @errp is &error_fatal. ++ * + * Receive and accumulate multiple errors (first one wins): + * Error *err = NULL, *local_err = NULL; + * foo(arg, &err); +@@ -187,6 +202,69 @@ + * error_setg(&err, ...); // WRONG! + * } + * because this may pass a non-null err to error_setg(). ++ * ++ * = Why, when and how to use ERRP_GUARD() = ++ * ++ * Without ERRP_GUARD(), use of the @errp parameter is restricted: ++ * - It must not be dereferenced, because it may be null. ++ * - It should not be passed to error_prepend() or ++ * error_append_hint(), because that doesn't work with &error_fatal. ++ * ERRP_GUARD() lifts these restrictions. ++ * ++ * To use ERRP_GUARD(), add it right at the beginning of the function. ++ * @errp can then be used without worrying about the argument being ++ * NULL or &error_fatal. ++ * ++ * Using it when it's not needed is safe, but please avoid cluttering ++ * the source with useless code. ++ * ++ * = Converting to ERRP_GUARD() = ++ * ++ * To convert a function to use ERRP_GUARD(): ++ * ++ * 0. If the Error ** parameter is not named @errp, rename it to ++ * @errp. ++ * ++ * 1. Add an ERRP_GUARD() invocation, by convention right at the ++ * beginning of the function. This makes @errp safe to use. ++ * ++ * 2. Replace &err by errp, and err by *errp. Delete local variable ++ * @err. ++ * ++ * 3. Delete error_propagate(errp, *errp), replace ++ * error_propagate_prepend(errp, *errp, ...) by error_prepend(errp, ...) ++ * ++ * 4. Ensure @errp is valid at return: when you destroy *errp, set ++ * errp = NULL. ++ * ++ * Example: ++ * ++ * bool fn(..., Error **errp) ++ * { ++ * Error *err = NULL; ++ * ++ * foo(arg, &err); ++ * if (err) { ++ * handle the error... ++ * error_propagate(errp, err); ++ * return false; ++ * } ++ * ... ++ * } ++ * ++ * becomes ++ * ++ * bool fn(..., Error **errp) ++ * { ++ * ERRP_GUARD(); ++ * ++ * foo(arg, errp); ++ * if (*errp) { ++ * handle the error... ++ * return false; ++ * } ++ * ... ++ * } + */ + + #ifndef ERROR_H +@@ -287,6 +365,7 @@ void error_setg_win32_internal(Error **errp, + * the error object. + * Else, move the error object from @local_err to *@dst_errp. + * On return, @local_err is invalid. ++ * Please use ERRP_GUARD() instead when possible. + * Please don't error_propagate(&error_fatal, ...), use + * error_report_err() and exit(), because that's more obvious. + */ +@@ -298,6 +377,7 @@ void error_propagate(Error **dst_errp, Error *local_err); + * Behaves like + * error_prepend(&local_err, fmt, ...); + * error_propagate(dst_errp, local_err); ++ * Please use ERRP_GUARD() and error_prepend() instead when possible. + */ + void error_propagate_prepend(Error **dst_errp, Error *local_err, + const char *fmt, ...); +@@ -395,6 +475,46 @@ void error_set_internal(Error **errp, + ErrorClass err_class, const char *fmt, ...) + GCC_FMT_ATTR(6, 7); + ++/* ++ * Make @errp parameter easier to use regardless of argument value ++ * ++ * This macro is for use right at the beginning of a function that ++ * takes an Error **errp parameter to pass errors to its caller. The ++ * parameter must be named @errp. ++ * ++ * It must be used when the function dereferences @errp or passes ++ * @errp to error_prepend(), error_vprepend(), or error_append_hint(). ++ * It is safe to use even when it's not needed, but please avoid ++ * cluttering the source with useless code. ++ * ++ * If @errp is NULL or &error_fatal, rewrite it to point to a local ++ * Error variable, which will be automatically propagated to the ++ * original @errp on function exit. ++ * ++ * Note: &error_abort is not rewritten, because that would move the ++ * abort from the place where the error is created to the place where ++ * it's propagated. ++ */ ++#define ERRP_GUARD() \ ++ g_auto(ErrorPropagator) _auto_errp_prop = {.errp = errp}; \ ++ do { \ ++ if (!errp || errp == &error_fatal) { \ ++ errp = &_auto_errp_prop.local_err; \ ++ } \ ++ } while (0) ++ ++typedef struct ErrorPropagator { ++ Error *local_err; ++ Error **errp; ++} ErrorPropagator; ++ ++static inline void error_propagator_cleanup(ErrorPropagator *prop) ++{ ++ error_propagate(prop->errp, prop->local_err); ++} ++ ++G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(ErrorPropagator, error_propagator_cleanup); ++ + /* + * Special error destination to abort on error. + * See error_setg() and error_propagate() for details. +-- +2.27.0 + diff --git a/SOURCES/kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch b/SOURCES/kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch new file mode 100644 index 0000000..aa47108 --- /dev/null +++ b/SOURCES/kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch @@ -0,0 +1,96 @@ +From 4e553943c8fe4924d194884b4719c5459210c686 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 26 Jan 2021 17:21:03 -0500 +Subject: [PATCH 8/9] file-posix: Allow byte-aligned O_DIRECT with NFS + +RH-Author: Kevin Wolf +Message-id: <20210126172103.136060-3-kwolf@redhat.com> +Patchwork-id: 100785 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/2] file-posix: Allow byte-aligned O_DIRECT with NFS +Bugzilla: 1834281 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +Since commit a6b257a08e3 ('file-posix: Handle undetectable alignment'), +we assume that if we open a file with O_DIRECT and alignment probing +returns 1, we just couldn't find out the real alignment requirement +because some filesystems make the requirement only for allocated blocks. +In this case, a safe default of 4k is used. + +This is too strict for NFS, which does actually allow byte-aligned +requests even with O_DIRECT. Because we can't distinguish both cases +with generic code, let's just look at the file system magic and disable +s->needs_alignment for NFS. This way, O_DIRECT can still be used on NFS +for images that are not aligned to 4k. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Message-Id: <20200716142601.111237-3-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 5edc85571e7b7269dce408735eba7507f18ac666) +Signed-off-by: Kevin Wolf +Signed-off-by: Jon Maloy +--- + block/file-posix.c | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index adafbfa1be..2d834fbdf6 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -61,10 +61,12 @@ + #include + #include + #include ++#include + #include + #include + #include + #include ++#include + #include + #ifdef __s390__ + #include +@@ -298,6 +300,28 @@ static int probe_physical_blocksize(int fd, unsigned int *blk_size) + #endif + } + ++/* ++ * Returns true if no alignment restrictions are necessary even for files ++ * opened with O_DIRECT. ++ * ++ * raw_probe_alignment() probes the required alignment and assume that 1 means ++ * the probing failed, so it falls back to a safe default of 4k. This can be ++ * avoided if we know that byte alignment is okay for the file. ++ */ ++static bool dio_byte_aligned(int fd) ++{ ++#ifdef __linux__ ++ struct statfs buf; ++ int ret; ++ ++ ret = fstatfs(fd, &buf); ++ if (ret == 0 && buf.f_type == NFS_SUPER_MAGIC) { ++ return true; ++ } ++#endif ++ return false; ++} ++ + /* Check if read is allowed with given memory buffer and length. + * + * This function is used to check O_DIRECT memory buffer and request alignment. +@@ -602,7 +626,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + + s->has_discard = true; + s->has_write_zeroes = true; +- if ((bs->open_flags & BDRV_O_NOCACHE) != 0) { ++ if ((bs->open_flags & BDRV_O_NOCACHE) != 0 && !dio_byte_aligned(s->fd)) { + s->needs_alignment = true; + } + +-- +2.18.2 + diff --git a/SOURCES/kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch b/SOURCES/kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch new file mode 100644 index 0000000..75788c5 --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch @@ -0,0 +1,222 @@ +From 602f17920e422e2b8d3ce485e56066a97b74e723 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:29 -0500 +Subject: [PATCH 05/17] hw/arm/smmu: Introduce SMMUTLBEntry for PTW and IOTLB + value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-5-eperezma@redhat.com> +Patchwork-id: 100597 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 04/13] hw/arm/smmu: Introduce SMMUTLBEntry for PTW and IOTLB value +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Eric Auger + +Introduce a specialized SMMUTLBEntry to store the result of +the PTW and cache in the IOTLB. This structure extends the +generic IOMMUTLBEntry struct with the level of the entry and +the granule size. + +Those latter will be useful when implementing range invalidation. + +Signed-off-by: Eric Auger +Reviewed-by: Peter Maydell +Message-id: 20200728150815.11446-5-eric.auger@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit a7550158556b7fc2f2baaecf9092499c6687b160) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 32 +++++++++++++++++--------------- + hw/arm/smmuv3.c | 10 +++++----- + include/hw/arm/smmu-common.h | 12 +++++++++--- + 3 files changed, 31 insertions(+), 23 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 0b89c9fbbbc..06e9e38b007 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -64,11 +64,11 @@ SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova) + return key; + } + +-IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, +- hwaddr iova) ++SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, ++ hwaddr iova) + { + SMMUIOTLBKey key = smmu_get_iotlb_key(cfg->asid, iova); +- IOMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); ++ SMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); + + if (entry) { + cfg->iotlb_hits++; +@@ -86,7 +86,7 @@ IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + return entry; + } + +-void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) ++void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) + { + SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); + +@@ -94,9 +94,9 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) + smmu_iotlb_inv_all(bs); + } + +- *key = smmu_get_iotlb_key(cfg->asid, entry->iova); +- trace_smmu_iotlb_insert(cfg->asid, entry->iova); +- g_hash_table_insert(bs->iotlb, key, entry); ++ *key = smmu_get_iotlb_key(cfg->asid, new->entry.iova); ++ trace_smmu_iotlb_insert(cfg->asid, new->entry.iova); ++ g_hash_table_insert(bs->iotlb, key, new); + } + + inline void smmu_iotlb_inv_all(SMMUState *s) +@@ -217,7 +217,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) + * @cfg: translation config + * @iova: iova to translate + * @perm: access type +- * @tlbe: IOMMUTLBEntry (out) ++ * @tlbe: SMMUTLBEntry (out) + * @info: handle to an error info + * + * Return 0 on success, < 0 on error. In case of error, @info is filled +@@ -227,7 +227,7 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) + */ + static int smmu_ptw_64(SMMUTransCfg *cfg, + dma_addr_t iova, IOMMUAccessFlags perm, +- IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) ++ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) + { + dma_addr_t baseaddr, indexmask; + int stage = cfg->stage; +@@ -247,8 +247,8 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, + baseaddr = extract64(tt->ttb, 0, 48); + baseaddr &= ~indexmask; + +- tlbe->iova = iova; +- tlbe->addr_mask = (1 << granule_sz) - 1; ++ tlbe->entry.iova = iova; ++ tlbe->entry.addr_mask = (1 << granule_sz) - 1; + + while (level <= 3) { + uint64_t subpage_size = 1ULL << level_shift(level, granule_sz); +@@ -299,14 +299,16 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, + goto error; + } + +- tlbe->translated_addr = gpa + (iova & mask); +- tlbe->perm = PTE_AP_TO_PERM(ap); ++ tlbe->entry.translated_addr = gpa + (iova & mask); ++ tlbe->entry.perm = PTE_AP_TO_PERM(ap); ++ tlbe->level = level; ++ tlbe->granule = granule_sz; + return 0; + } + info->type = SMMU_PTW_ERR_TRANSLATION; + + error: +- tlbe->perm = IOMMU_NONE; ++ tlbe->entry.perm = IOMMU_NONE; + return -EINVAL; + } + +@@ -322,7 +324,7 @@ error: + * return 0 on success + */ + inline int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, +- IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) ++ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) + { + if (!cfg->aa64) { + /* +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 34dea4df4da..ad8212779d3 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -614,7 +614,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + SMMUTranslationStatus status; + SMMUState *bs = ARM_SMMU(s); + uint64_t page_mask, aligned_addr; +- IOMMUTLBEntry *cached_entry = NULL; ++ SMMUTLBEntry *cached_entry = NULL; + SMMUTransTableInfo *tt; + SMMUTransCfg *cfg = NULL; + IOMMUTLBEntry entry = { +@@ -664,7 +664,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + + cached_entry = smmu_iotlb_lookup(bs, cfg, aligned_addr); + if (cached_entry) { +- if ((flag & IOMMU_WO) && !(cached_entry->perm & IOMMU_WO)) { ++ if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { + status = SMMU_TRANS_ERROR; + if (event.record_trans_faults) { + event.type = SMMU_EVT_F_PERMISSION; +@@ -677,7 +677,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + goto epilogue; + } + +- cached_entry = g_new0(IOMMUTLBEntry, 1); ++ cached_entry = g_new0(SMMUTLBEntry, 1); + + if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { + g_free(cached_entry); +@@ -731,9 +731,9 @@ epilogue: + switch (status) { + case SMMU_TRANS_SUCCESS: + entry.perm = flag; +- entry.translated_addr = cached_entry->translated_addr + ++ entry.translated_addr = cached_entry->entry.translated_addr + + (addr & page_mask); +- entry.addr_mask = cached_entry->addr_mask; ++ entry.addr_mask = cached_entry->entry.addr_mask; + trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, + entry.translated_addr, entry.perm); + break; +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index bceba40885c..277923bdc0a 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -52,6 +52,12 @@ typedef struct SMMUTransTableInfo { + uint8_t granule_sz; /* granule page shift */ + } SMMUTransTableInfo; + ++typedef struct SMMUTLBEntry { ++ IOMMUTLBEntry entry; ++ uint8_t level; ++ uint8_t granule; ++} SMMUTLBEntry; ++ + /* + * Generic structure populated by derived SMMU devices + * after decoding the configuration information and used as +@@ -140,7 +146,7 @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev) + * pair, according to @cfg translation config + */ + int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, +- IOMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); ++ SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); + + /** + * select_tt - compute which translation table shall be used according to +@@ -153,8 +159,8 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); + + #define SMMU_IOTLB_MAX_SIZE 256 + +-IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); +-void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry); ++SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); ++void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); + SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova); + void smmu_iotlb_inv_all(SMMUState *s); + void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch b/SOURCES/kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch new file mode 100644 index 0000000..6500b41 --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch @@ -0,0 +1,166 @@ +From 7833c0bf8321cb39614ee889cf3e3a64511c0aa5 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:28 -0500 +Subject: [PATCH 04/17] hw/arm/smmu: Introduce smmu_get_iotlb_key() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-4-eperezma@redhat.com> +Patchwork-id: 100596 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 03/13] hw/arm/smmu: Introduce smmu_get_iotlb_key() +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Eric Auger + +Introduce the smmu_get_iotlb_key() helper and the +SMMU_IOTLB_ASID() macro. Also move smmu_get_iotlb_key and +smmu_iotlb_key_hash in the IOTLB related code section. + +Signed-off-by: Eric Auger +Reviewed-by: Peter Maydell +Message-id: 20200728150815.11446-4-eric.auger@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 60a61f1b31fc03080aadb63c9b1006f8b1972adb) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 66 ++++++++++++++++++++---------------- + hw/arm/smmu-internal.h | 1 + + include/hw/arm/smmu-common.h | 1 + + 3 files changed, 38 insertions(+), 30 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 8e01505dbee..0b89c9fbbbc 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -32,10 +32,42 @@ + + /* IOTLB Management */ + ++static guint smmu_iotlb_key_hash(gconstpointer v) ++{ ++ SMMUIOTLBKey *key = (SMMUIOTLBKey *)v; ++ uint32_t a, b, c; ++ ++ /* Jenkins hash */ ++ a = b = c = JHASH_INITVAL + sizeof(*key); ++ a += key->asid; ++ b += extract64(key->iova, 0, 32); ++ c += extract64(key->iova, 32, 32); ++ ++ __jhash_mix(a, b, c); ++ __jhash_final(a, b, c); ++ ++ return c; ++} ++ ++static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) ++{ ++ const SMMUIOTLBKey *k1 = v1; ++ const SMMUIOTLBKey *k2 = v2; ++ ++ return (k1->asid == k2->asid) && (k1->iova == k2->iova); ++} ++ ++SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova) ++{ ++ SMMUIOTLBKey key = {.asid = asid, .iova = iova}; ++ ++ return key; ++} ++ + IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + hwaddr iova) + { +- SMMUIOTLBKey key = {.asid = cfg->asid, .iova = iova}; ++ SMMUIOTLBKey key = smmu_get_iotlb_key(cfg->asid, iova); + IOMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); + + if (entry) { +@@ -62,8 +94,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) + smmu_iotlb_inv_all(bs); + } + +- key->asid = cfg->asid; +- key->iova = entry->iova; ++ *key = smmu_get_iotlb_key(cfg->asid, entry->iova); + trace_smmu_iotlb_insert(cfg->asid, entry->iova); + g_hash_table_insert(bs->iotlb, key, entry); + } +@@ -80,12 +111,12 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, + uint16_t asid = *(uint16_t *)user_data; + SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; + +- return iotlb_key->asid == asid; ++ return SMMU_IOTLB_ASID(*iotlb_key) == asid; + } + + inline void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova) + { +- SMMUIOTLBKey key = {.asid = asid, .iova = iova}; ++ SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova); + + trace_smmu_iotlb_inv_iova(asid, iova); + g_hash_table_remove(s->iotlb, &key); +@@ -382,31 +413,6 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) + return NULL; + } + +-static guint smmu_iotlb_key_hash(gconstpointer v) +-{ +- SMMUIOTLBKey *key = (SMMUIOTLBKey *)v; +- uint32_t a, b, c; +- +- /* Jenkins hash */ +- a = b = c = JHASH_INITVAL + sizeof(*key); +- a += key->asid; +- b += extract64(key->iova, 0, 32); +- c += extract64(key->iova, 32, 32); +- +- __jhash_mix(a, b, c); +- __jhash_final(a, b, c); +- +- return c; +-} +- +-static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) +-{ +- const SMMUIOTLBKey *k1 = v1; +- const SMMUIOTLBKey *k2 = v2; +- +- return (k1->asid == k2->asid) && (k1->iova == k2->iova); +-} +- + /* Unmap the whole notifier's range */ + static void smmu_unmap_notifier_range(IOMMUNotifier *n) + { +diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h +index 7794d6d3947..3104f768cd2 100644 +--- a/hw/arm/smmu-internal.h ++++ b/hw/arm/smmu-internal.h +@@ -96,4 +96,5 @@ uint64_t iova_level_offset(uint64_t iova, int inputsize, + MAKE_64BIT_MASK(0, gsz - 3); + } + ++#define SMMU_IOTLB_ASID(key) ((key).asid) + #endif +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index a28650c9350..bceba40885c 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -155,6 +155,7 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); + + IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); + void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry); ++SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova); + void smmu_iotlb_inv_all(SMMUState *s); + void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); + void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch b/SOURCES/kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch new file mode 100644 index 0000000..ebe3d15 --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch @@ -0,0 +1,181 @@ +From fbfa584e58a560f27081043ad8e90ee9022421c0 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:27 -0500 +Subject: [PATCH 03/17] hw/arm/smmu-common: Add IOTLB helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-3-eperezma@redhat.com> +Patchwork-id: 100595 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 02/13] hw/arm/smmu-common: Add IOTLB helpers +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Eric Auger + +Add two helpers: one to lookup for a given IOTLB entry and +one to insert a new entry. We also move the tracing there. + +Signed-off-by: Eric Auger +Reviewed-by: Peter Maydell +Message-id: 20200728150815.11446-3-eric.auger@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 6808bca939b8722d98165319ba42366ca80de907) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 36 ++++++++++++++++++++++++++++++++++++ + hw/arm/smmuv3.c | 26 ++------------------------ + hw/arm/trace-events | 5 +++-- + include/hw/arm/smmu-common.h | 2 ++ + 4 files changed, 43 insertions(+), 26 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index d2ba8b224ba..8e01505dbee 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -32,6 +32,42 @@ + + /* IOTLB Management */ + ++IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, ++ hwaddr iova) ++{ ++ SMMUIOTLBKey key = {.asid = cfg->asid, .iova = iova}; ++ IOMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); ++ ++ if (entry) { ++ cfg->iotlb_hits++; ++ trace_smmu_iotlb_lookup_hit(cfg->asid, iova, ++ cfg->iotlb_hits, cfg->iotlb_misses, ++ 100 * cfg->iotlb_hits / ++ (cfg->iotlb_hits + cfg->iotlb_misses)); ++ } else { ++ cfg->iotlb_misses++; ++ trace_smmu_iotlb_lookup_miss(cfg->asid, iova, ++ cfg->iotlb_hits, cfg->iotlb_misses, ++ 100 * cfg->iotlb_hits / ++ (cfg->iotlb_hits + cfg->iotlb_misses)); ++ } ++ return entry; ++} ++ ++void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry) ++{ ++ SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); ++ ++ if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { ++ smmu_iotlb_inv_all(bs); ++ } ++ ++ key->asid = cfg->asid; ++ key->iova = entry->iova; ++ trace_smmu_iotlb_insert(cfg->asid, entry->iova); ++ g_hash_table_insert(bs->iotlb, key, entry); ++} ++ + inline void smmu_iotlb_inv_all(SMMUState *s) + { + trace_smmu_iotlb_inv_all(); +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index e2fbb8357ea..34dea4df4da 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -624,7 +624,6 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; +- SMMUIOTLBKey key, *new_key; + + qemu_mutex_lock(&s->mutex); + +@@ -663,16 +662,8 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + page_mask = (1ULL << (tt->granule_sz)) - 1; + aligned_addr = addr & ~page_mask; + +- key.asid = cfg->asid; +- key.iova = aligned_addr; +- +- cached_entry = g_hash_table_lookup(bs->iotlb, &key); ++ cached_entry = smmu_iotlb_lookup(bs, cfg, aligned_addr); + if (cached_entry) { +- cfg->iotlb_hits++; +- trace_smmu_iotlb_cache_hit(cfg->asid, aligned_addr, +- cfg->iotlb_hits, cfg->iotlb_misses, +- 100 * cfg->iotlb_hits / +- (cfg->iotlb_hits + cfg->iotlb_misses)); + if ((flag & IOMMU_WO) && !(cached_entry->perm & IOMMU_WO)) { + status = SMMU_TRANS_ERROR; + if (event.record_trans_faults) { +@@ -686,16 +677,6 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + goto epilogue; + } + +- cfg->iotlb_misses++; +- trace_smmu_iotlb_cache_miss(cfg->asid, addr & ~page_mask, +- cfg->iotlb_hits, cfg->iotlb_misses, +- 100 * cfg->iotlb_hits / +- (cfg->iotlb_hits + cfg->iotlb_misses)); +- +- if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { +- smmu_iotlb_inv_all(bs); +- } +- + cached_entry = g_new0(IOMMUTLBEntry, 1); + + if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { +@@ -741,10 +722,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + } + status = SMMU_TRANS_ERROR; + } else { +- new_key = g_new0(SMMUIOTLBKey, 1); +- new_key->asid = cfg->asid; +- new_key->iova = aligned_addr; +- g_hash_table_insert(bs->iotlb, new_key, cached_entry); ++ smmu_iotlb_insert(bs, cfg, cached_entry); + status = SMMU_TRANS_SUCCESS; + } + +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index 0acedcedc6f..b808a1bfc19 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -14,6 +14,9 @@ smmu_iotlb_inv_all(void) "IOTLB invalidate all" + smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d" + smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 + smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" ++smmu_iotlb_lookup_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" ++smmu_iotlb_lookup_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" ++smmu_iotlb_insert(uint16_t asid, uint64_t addr) "IOTLB ++ asid=%d addr=0x%"PRIx64 + + # smmuv3.c + smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)" +@@ -46,8 +49,6 @@ smmuv3_cmdq_tlbi_nh_va(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d a + smmuv3_cmdq_tlbi_nh_vaa(int vmid, uint64_t addr) "vmid =%d addr=0x%"PRIx64 + smmuv3_cmdq_tlbi_nh(void) "" + smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" +-smmu_iotlb_cache_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" +-smmu_iotlb_cache_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" + smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" + smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" + smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 1f37844e5c9..a28650c9350 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -153,6 +153,8 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); + + #define SMMU_IOTLB_MAX_SIZE 256 + ++IOMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); ++void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, IOMMUTLBEntry *entry); + void smmu_iotlb_inv_all(SMMUState *s); + void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); + void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch b/SOURCES/kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch new file mode 100644 index 0000000..d973b13 --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch @@ -0,0 +1,124 @@ +From 79718d8c67c9c54fa86a77f66aa8784aca7651d5 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:26 -0500 +Subject: [PATCH 02/17] hw/arm/smmu-common: Factorize some code in + smmu_ptw_64() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-2-eperezma@redhat.com> +Patchwork-id: 100594 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 01/13] hw/arm/smmu-common: Factorize some code in smmu_ptw_64() +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Eric Auger + +Page and block PTE decoding can share some code. Let's +first handle table PTE and factorize some code shared by +page and block PTEs. + +Signed-off-by: Eric Auger +Reviewed-by: Peter Maydell +Message-id: 20200728150815.11446-2-eric.auger@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 1733837d7cdb207653a849a5f1fa78de878c6ac1) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 48 ++++++++++++++++---------------------------- + 1 file changed, 17 insertions(+), 31 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 245817d23e9..d2ba8b224ba 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -187,7 +187,7 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, + uint64_t subpage_size = 1ULL << level_shift(level, granule_sz); + uint64_t mask = subpage_size - 1; + uint32_t offset = iova_level_offset(iova, inputsize, level, granule_sz); +- uint64_t pte; ++ uint64_t pte, gpa; + dma_addr_t pte_addr = baseaddr + offset * sizeof(pte); + uint8_t ap; + +@@ -200,56 +200,42 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, + if (is_invalid_pte(pte) || is_reserved_pte(pte, level)) { + trace_smmu_ptw_invalid_pte(stage, level, baseaddr, + pte_addr, offset, pte); +- info->type = SMMU_PTW_ERR_TRANSLATION; +- goto error; ++ break; + } + +- if (is_page_pte(pte, level)) { +- uint64_t gpa = get_page_pte_address(pte, granule_sz); ++ if (is_table_pte(pte, level)) { ++ ap = PTE_APTABLE(pte); + +- ap = PTE_AP(pte); + if (is_permission_fault(ap, perm)) { + info->type = SMMU_PTW_ERR_PERMISSION; + goto error; + } +- +- tlbe->translated_addr = gpa + (iova & mask); +- tlbe->perm = PTE_AP_TO_PERM(ap); ++ baseaddr = get_table_pte_address(pte, granule_sz); ++ level++; ++ continue; ++ } else if (is_page_pte(pte, level)) { ++ gpa = get_page_pte_address(pte, granule_sz); + trace_smmu_ptw_page_pte(stage, level, iova, + baseaddr, pte_addr, pte, gpa); +- return 0; +- } +- if (is_block_pte(pte, level)) { ++ } else { + uint64_t block_size; +- hwaddr gpa = get_block_pte_address(pte, level, granule_sz, +- &block_size); +- +- ap = PTE_AP(pte); +- if (is_permission_fault(ap, perm)) { +- info->type = SMMU_PTW_ERR_PERMISSION; +- goto error; +- } + ++ gpa = get_block_pte_address(pte, level, granule_sz, ++ &block_size); + trace_smmu_ptw_block_pte(stage, level, baseaddr, + pte_addr, pte, iova, gpa, + block_size >> 20); +- +- tlbe->translated_addr = gpa + (iova & mask); +- tlbe->perm = PTE_AP_TO_PERM(ap); +- return 0; + } +- +- /* table pte */ +- ap = PTE_APTABLE(pte); +- ++ ap = PTE_AP(pte); + if (is_permission_fault(ap, perm)) { + info->type = SMMU_PTW_ERR_PERMISSION; + goto error; + } +- baseaddr = get_table_pte_address(pte, granule_sz); +- level++; +- } + ++ tlbe->translated_addr = gpa + (iova & mask); ++ tlbe->perm = PTE_AP_TO_PERM(ap); ++ return 0; ++ } + info->type = SMMU_PTW_ERR_TRANSLATION; + + error: +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch b/SOURCES/kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch new file mode 100644 index 0000000..e118225 --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch @@ -0,0 +1,274 @@ +From 4770f43dab482e4585d3555933a473cf24e796db Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:30 -0500 +Subject: [PATCH 06/17] hw/arm/smmu-common: Manage IOTLB block entries +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-6-eperezma@redhat.com> +Patchwork-id: 100598 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 05/13] hw/arm/smmu-common: Manage IOTLB block entries +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Eric Auger + +At the moment each entry in the IOTLB corresponds to a page sized +mapping (4K, 16K or 64K), even if the page belongs to a mapped +block. In case of block mapping this unefficiently consumes IOTLB +entries. + +Change the value of the entry so that it reflects the actual +mapping it belongs to (block or page start address and size). + +Also the level/tg of the entry is encoded in the key. In subsequent +patches we will enable range invalidation. This latter is able +to provide the level/tg of the entry. + +Encoding the level/tg directly in the key will allow to invalidate +using g_hash_table_remove() when num_pages equals to 1. + +Signed-off-by: Eric Auger +Reviewed-by: Peter Maydell +Message-id: 20200728150815.11446-6-eric.auger@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 9e54dee71fcfaae69f87b8e1f51485a832266a39) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 67 ++++++++++++++++++++++++++---------- + hw/arm/smmu-internal.h | 7 ++++ + hw/arm/smmuv3.c | 6 ++-- + hw/arm/trace-events | 2 +- + include/hw/arm/smmu-common.h | 10 ++++-- + 5 files changed, 67 insertions(+), 25 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 06e9e38b007..8007edeaaa2 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -39,7 +39,7 @@ static guint smmu_iotlb_key_hash(gconstpointer v) + + /* Jenkins hash */ + a = b = c = JHASH_INITVAL + sizeof(*key); +- a += key->asid; ++ a += key->asid + key->level + key->tg; + b += extract64(key->iova, 0, 32); + c += extract64(key->iova, 32, 32); + +@@ -51,24 +51,41 @@ static guint smmu_iotlb_key_hash(gconstpointer v) + + static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) + { +- const SMMUIOTLBKey *k1 = v1; +- const SMMUIOTLBKey *k2 = v2; ++ SMMUIOTLBKey *k1 = (SMMUIOTLBKey *)v1, *k2 = (SMMUIOTLBKey *)v2; + +- return (k1->asid == k2->asid) && (k1->iova == k2->iova); ++ return (k1->asid == k2->asid) && (k1->iova == k2->iova) && ++ (k1->level == k2->level) && (k1->tg == k2->tg); + } + +-SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova) ++SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, ++ uint8_t tg, uint8_t level) + { +- SMMUIOTLBKey key = {.asid = asid, .iova = iova}; ++ SMMUIOTLBKey key = {.asid = asid, .iova = iova, .tg = tg, .level = level}; + + return key; + } + + SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, +- hwaddr iova) ++ SMMUTransTableInfo *tt, hwaddr iova) + { +- SMMUIOTLBKey key = smmu_get_iotlb_key(cfg->asid, iova); +- SMMUTLBEntry *entry = g_hash_table_lookup(bs->iotlb, &key); ++ uint8_t tg = (tt->granule_sz - 10) / 2; ++ uint8_t inputsize = 64 - tt->tsz; ++ uint8_t stride = tt->granule_sz - 3; ++ uint8_t level = 4 - (inputsize - 4) / stride; ++ SMMUTLBEntry *entry = NULL; ++ ++ while (level <= 3) { ++ uint64_t subpage_size = 1ULL << level_shift(level, tt->granule_sz); ++ uint64_t mask = subpage_size - 1; ++ SMMUIOTLBKey key; ++ ++ key = smmu_get_iotlb_key(cfg->asid, iova & ~mask, tg, level); ++ entry = g_hash_table_lookup(bs->iotlb, &key); ++ if (entry) { ++ break; ++ } ++ level++; ++ } + + if (entry) { + cfg->iotlb_hits++; +@@ -89,13 +106,14 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *new) + { + SMMUIOTLBKey *key = g_new0(SMMUIOTLBKey, 1); ++ uint8_t tg = (new->granule - 10) / 2; + + if (g_hash_table_size(bs->iotlb) >= SMMU_IOTLB_MAX_SIZE) { + smmu_iotlb_inv_all(bs); + } + +- *key = smmu_get_iotlb_key(cfg->asid, new->entry.iova); +- trace_smmu_iotlb_insert(cfg->asid, new->entry.iova); ++ *key = smmu_get_iotlb_key(cfg->asid, new->entry.iova, tg, new->level); ++ trace_smmu_iotlb_insert(cfg->asid, new->entry.iova, tg, new->level); + g_hash_table_insert(bs->iotlb, key, new); + } + +@@ -114,12 +132,26 @@ static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, + return SMMU_IOTLB_ASID(*iotlb_key) == asid; + } + +-inline void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova) ++static gboolean smmu_hash_remove_by_asid_iova(gpointer key, gpointer value, ++ gpointer user_data) + { +- SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova); ++ SMMUTLBEntry *iter = (SMMUTLBEntry *)value; ++ IOMMUTLBEntry *entry = &iter->entry; ++ SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data; ++ SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key; ++ ++ if (info->asid >= 0 && info->asid != SMMU_IOTLB_ASID(iotlb_key)) { ++ return false; ++ } ++ return (info->iova & ~entry->addr_mask) == entry->iova; ++} ++ ++inline void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova) ++{ ++ SMMUIOTLBPageInvInfo info = {.asid = asid, .iova = iova}; + + trace_smmu_iotlb_inv_iova(asid, iova); +- g_hash_table_remove(s->iotlb, &key); ++ g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_iova, &info); + } + + inline void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) +@@ -247,9 +279,6 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, + baseaddr = extract64(tt->ttb, 0, 48); + baseaddr &= ~indexmask; + +- tlbe->entry.iova = iova; +- tlbe->entry.addr_mask = (1 << granule_sz) - 1; +- + while (level <= 3) { + uint64_t subpage_size = 1ULL << level_shift(level, granule_sz); + uint64_t mask = subpage_size - 1; +@@ -299,7 +328,9 @@ static int smmu_ptw_64(SMMUTransCfg *cfg, + goto error; + } + +- tlbe->entry.translated_addr = gpa + (iova & mask); ++ tlbe->entry.translated_addr = gpa; ++ tlbe->entry.iova = iova & ~mask; ++ tlbe->entry.addr_mask = mask; + tlbe->entry.perm = PTE_AP_TO_PERM(ap); + tlbe->level = level; + tlbe->granule = granule_sz; +diff --git a/hw/arm/smmu-internal.h b/hw/arm/smmu-internal.h +index 3104f768cd2..55147f29be4 100644 +--- a/hw/arm/smmu-internal.h ++++ b/hw/arm/smmu-internal.h +@@ -97,4 +97,11 @@ uint64_t iova_level_offset(uint64_t iova, int inputsize, + } + + #define SMMU_IOTLB_ASID(key) ((key).asid) ++ ++typedef struct SMMUIOTLBPageInvInfo { ++ int asid; ++ uint64_t iova; ++ uint64_t mask; ++} SMMUIOTLBPageInvInfo; ++ + #endif +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index ad8212779d3..067c9480a03 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -662,7 +662,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, + page_mask = (1ULL << (tt->granule_sz)) - 1; + aligned_addr = addr & ~page_mask; + +- cached_entry = smmu_iotlb_lookup(bs, cfg, aligned_addr); ++ cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr); + if (cached_entry) { + if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { + status = SMMU_TRANS_ERROR; +@@ -732,7 +732,7 @@ epilogue: + case SMMU_TRANS_SUCCESS: + entry.perm = flag; + entry.translated_addr = cached_entry->entry.translated_addr + +- (addr & page_mask); ++ (addr & cached_entry->entry.addr_mask); + entry.addr_mask = cached_entry->entry.addr_mask; + trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, + entry.translated_addr, entry.perm); +@@ -960,7 +960,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + + trace_smmuv3_cmdq_tlbi_nh_vaa(vmid, addr); + smmuv3_inv_notifiers_iova(bs, -1, addr); +- smmu_iotlb_inv_all(bs); ++ smmu_iotlb_inv_iova(bs, -1, addr); + break; + } + case SMMU_CMD_TLBI_NH_VA: +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index b808a1bfc19..f74d3e920f1 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -16,7 +16,7 @@ smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr + smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" + smmu_iotlb_lookup_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" + smmu_iotlb_lookup_miss(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" +-smmu_iotlb_insert(uint16_t asid, uint64_t addr) "IOTLB ++ asid=%d addr=0x%"PRIx64 ++smmu_iotlb_insert(uint16_t asid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d addr=0x%"PRIx64" tg=%d level=%d" + + # smmuv3.c + smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)" +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 277923bdc0a..bbf3abc41fd 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -97,6 +97,8 @@ typedef struct SMMUPciBus { + typedef struct SMMUIOTLBKey { + uint64_t iova; + uint16_t asid; ++ uint8_t tg; ++ uint8_t level; + } SMMUIOTLBKey; + + typedef struct SMMUState { +@@ -159,12 +161,14 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid); + + #define SMMU_IOTLB_MAX_SIZE 256 + +-SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, hwaddr iova); ++SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, ++ SMMUTransTableInfo *tt, hwaddr iova); + void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); +-SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova); ++SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, ++ uint8_t tg, uint8_t level); + void smmu_iotlb_inv_all(SMMUState *s); + void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); +-void smmu_iotlb_inv_iova(SMMUState *s, uint16_t asid, dma_addr_t iova); ++void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova); + + /* Unmap the range of all the notifiers registered to any IOMMU mr */ + void smmu_inv_notifiers_all(SMMUState *s); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch b/SOURCES/kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch new file mode 100644 index 0000000..79e75d8 --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch @@ -0,0 +1,67 @@ +From 69d71311d3d70282dec3d1f19f9e4b90c7b7c6b9 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:33 -0500 +Subject: [PATCH 09/17] hw/arm/smmuv3: Fix potential integer overflow (CID + 1432363) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-9-eperezma@redhat.com> +Patchwork-id: 100601 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 08/13] hw/arm/smmuv3: Fix potential integer overflow (CID 1432363) +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Philippe Mathieu-Daudé + +Use the BIT_ULL() macro to ensure we use 64-bit arithmetic. +This fixes the following Coverity issue (OVERFLOW_BEFORE_WIDEN): + + CID 1432363 (#1 of 1): Unintentional integer overflow: + + overflow_before_widen: + Potentially overflowing expression 1 << scale with type int + (32 bits, signed) is evaluated using 32-bit arithmetic, and + then used in a context that expects an expression of type + hwaddr (64 bits, unsigned). + +Signed-off-by: Philippe Mathieu-Daudé +Acked-by: Eric Auger +Message-id: 20201030144617.1535064-1-philmd@redhat.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit 744a790ec01a30033309e6a2155df4d61061e184) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmuv3.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index f4d5d9d8222..a418fab2aa6 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -17,6 +17,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/bitops.h" + #include "hw/irq.h" + #include "hw/sysbus.h" + #include "migration/vmstate.h" +@@ -847,7 +848,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) + scale = CMD_SCALE(cmd); + num = CMD_NUM(cmd); + ttl = CMD_TTL(cmd); +- num_pages = (num + 1) * (1 << (scale)); ++ num_pages = (num + 1) * BIT_ULL(scale); + } + + if (type == SMMU_CMD_TLBI_NH_VA) { +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch b/SOURCES/kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch new file mode 100644 index 0000000..fd52e0c --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch @@ -0,0 +1,255 @@ +From 3f027ac56449e51a61e76c18b97fd341d302dc80 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:32 -0500 +Subject: [PATCH 08/17] hw/arm/smmuv3: Get prepared for range invalidation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-8-eperezma@redhat.com> +Patchwork-id: 100600 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 07/13] hw/arm/smmuv3: Get prepared for range invalidation +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Eric Auger + +Enhance the smmu_iotlb_inv_iova() helper with range invalidation. +This uses the new fields passed in the NH_VA and NH_VAA commands: +the size of the range, the level and the granule. + +As NH_VA and NH_VAA both use those fields, their decoding and +handling is factorized in a new smmuv3_s1_range_inval() helper. + +Signed-off-by: Eric Auger +Reviewed-by: Peter Maydell +Message-id: 20200728150815.11446-8-eric.auger@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit d52915616c059ed273caa2d496b58e5d215c5962) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 25 +++++++++++--- + hw/arm/smmuv3-internal.h | 4 +++ + hw/arm/smmuv3.c | 64 +++++++++++++++++++++++------------- + hw/arm/trace-events | 4 +-- + include/hw/arm/smmu-common.h | 3 +- + 5 files changed, 69 insertions(+), 31 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 8007edeaaa2..9780404f002 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -143,15 +143,30 @@ static gboolean smmu_hash_remove_by_asid_iova(gpointer key, gpointer value, + if (info->asid >= 0 && info->asid != SMMU_IOTLB_ASID(iotlb_key)) { + return false; + } +- return (info->iova & ~entry->addr_mask) == entry->iova; ++ return ((info->iova & ~entry->addr_mask) == entry->iova) || ++ ((entry->iova & ~info->mask) == info->iova); + } + +-inline void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova) ++inline void ++smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, ++ uint8_t tg, uint64_t num_pages, uint8_t ttl) + { +- SMMUIOTLBPageInvInfo info = {.asid = asid, .iova = iova}; ++ if (ttl && (num_pages == 1)) { ++ SMMUIOTLBKey key = smmu_get_iotlb_key(asid, iova, tg, ttl); + +- trace_smmu_iotlb_inv_iova(asid, iova); +- g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_iova, &info); ++ g_hash_table_remove(s->iotlb, &key); ++ } else { ++ /* if tg is not set we use 4KB range invalidation */ ++ uint8_t granule = tg ? tg * 2 + 10 : 12; ++ ++ SMMUIOTLBPageInvInfo info = { ++ .asid = asid, .iova = iova, ++ .mask = (num_pages * 1 << granule) - 1}; ++ ++ g_hash_table_foreach_remove(s->iotlb, ++ smmu_hash_remove_by_asid_iova, ++ &info); ++ } + } + + inline void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) +diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h +index d190181ef1b..a4ec2c591cd 100644 +--- a/hw/arm/smmuv3-internal.h ++++ b/hw/arm/smmuv3-internal.h +@@ -298,6 +298,8 @@ enum { /* Command completion notification */ + }; + + #define CMD_TYPE(x) extract32((x)->word[0], 0 , 8) ++#define CMD_NUM(x) extract32((x)->word[0], 12 , 5) ++#define CMD_SCALE(x) extract32((x)->word[0], 20 , 5) + #define CMD_SSEC(x) extract32((x)->word[0], 10, 1) + #define CMD_SSV(x) extract32((x)->word[0], 11, 1) + #define CMD_RESUME_AC(x) extract32((x)->word[0], 12, 1) +@@ -310,6 +312,8 @@ enum { /* Command completion notification */ + #define CMD_RESUME_STAG(x) extract32((x)->word[2], 0 , 16) + #define CMD_RESP(x) extract32((x)->word[2], 11, 2) + #define CMD_LEAF(x) extract32((x)->word[2], 0 , 1) ++#define CMD_TTL(x) extract32((x)->word[2], 8 , 2) ++#define CMD_TG(x) extract32((x)->word[2], 10, 2) + #define CMD_STE_RANGE(x) extract32((x)->word[2], 0 , 5) + #define CMD_ADDR(x) ({ \ + uint64_t high = (uint64_t)(x)->word[3]; \ +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index ae2b769f891..f4d5d9d8222 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -773,42 +773,49 @@ epilogue: + * @n: notifier to be called + * @asid: address space ID or negative value if we don't care + * @iova: iova ++ * @tg: translation granule (if communicated through range invalidation) ++ * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1 + */ + static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + IOMMUNotifier *n, +- int asid, +- dma_addr_t iova) ++ int asid, dma_addr_t iova, ++ uint8_t tg, uint64_t num_pages) + { + SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); +- SMMUEventInfo event = {.inval_ste_allowed = true}; +- SMMUTransTableInfo *tt; +- SMMUTransCfg *cfg; + IOMMUTLBEntry entry; ++ uint8_t granule = tg; + +- cfg = smmuv3_get_config(sdev, &event); +- if (!cfg) { +- return; +- } ++ if (!tg) { ++ SMMUEventInfo event = {.inval_ste_allowed = true}; ++ SMMUTransCfg *cfg = smmuv3_get_config(sdev, &event); ++ SMMUTransTableInfo *tt; + +- if (asid >= 0 && cfg->asid != asid) { +- return; +- } ++ if (!cfg) { ++ return; ++ } + +- tt = select_tt(cfg, iova); +- if (!tt) { +- return; ++ if (asid >= 0 && cfg->asid != asid) { ++ return; ++ } ++ ++ tt = select_tt(cfg, iova); ++ if (!tt) { ++ return; ++ } ++ granule = tt->granule_sz; + } + + entry.target_as = &address_space_memory; + entry.iova = iova; +- entry.addr_mask = (1 << tt->granule_sz) - 1; ++ entry.addr_mask = num_pages * (1 << granule) - 1; + entry.perm = IOMMU_NONE; + + memory_region_notify_one(n, &entry); + } + +-/* invalidate an asid/iova tuple in all mr's */ +-static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) ++/* invalidate an asid/iova range tuple in all mr's */ ++static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, ++ uint8_t tg, uint64_t num_pages) + { + SMMUDevice *sdev; + +@@ -816,28 +823,39 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) + IOMMUMemoryRegion *mr = &sdev->iommu; + IOMMUNotifier *n; + +- trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova); ++ trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova, ++ tg, num_pages); + + IOMMU_NOTIFIER_FOREACH(n, mr) { +- smmuv3_notify_iova(mr, n, asid, iova); ++ smmuv3_notify_iova(mr, n, asid, iova, tg, num_pages); + } + } + } + + static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) + { ++ uint8_t scale = 0, num = 0, ttl = 0; + dma_addr_t addr = CMD_ADDR(cmd); + uint8_t type = CMD_TYPE(cmd); + uint16_t vmid = CMD_VMID(cmd); + bool leaf = CMD_LEAF(cmd); ++ uint8_t tg = CMD_TG(cmd); ++ hwaddr num_pages = 1; + int asid = -1; + ++ if (tg) { ++ scale = CMD_SCALE(cmd); ++ num = CMD_NUM(cmd); ++ ttl = CMD_TTL(cmd); ++ num_pages = (num + 1) * (1 << (scale)); ++ } ++ + if (type == SMMU_CMD_TLBI_NH_VA) { + asid = CMD_ASID(cmd); + } +- trace_smmuv3_s1_range_inval(vmid, asid, addr, leaf); +- smmuv3_inv_notifiers_iova(s, asid, addr); +- smmu_iotlb_inv_iova(s, asid, addr); ++ trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); ++ smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages); ++ smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl); + } + + static int smmuv3_cmdq_consume(SMMUv3State *s) +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index c219fe9e828..3d905e0f7d0 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -45,11 +45,11 @@ smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%d - end=0x%d" + smmuv3_cmdq_cfgi_cd(uint32_t sid) "streamid = %d" + smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid %d (hits=%d, misses=%d, hit rate=%d)" + smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)" +-smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" ++smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d" + smmuv3_cmdq_tlbi_nh(void) "" + smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" + smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" + smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" + smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" +-smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova) "iommu mr=%s asid=%d iova=0x%"PRIx64 ++smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 + +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index bbf3abc41fd..13489a1ac0d 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -168,7 +168,8 @@ SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint64_t iova, + uint8_t tg, uint8_t level); + void smmu_iotlb_inv_all(SMMUState *s); + void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); +-void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova); ++void smmu_iotlb_inv_iova(SMMUState *s, int asid, dma_addr_t iova, ++ uint8_t tg, uint64_t num_pages, uint8_t ttl); + + /* Unmap the range of all the notifiers registered to any IOMMU mr */ + void smmu_inv_notifiers_all(SMMUState *s); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch b/SOURCES/kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch new file mode 100644 index 0000000..e77c403 --- /dev/null +++ b/SOURCES/kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch @@ -0,0 +1,115 @@ +From c4ae2dbb8ee406f0a015b35fb76b3d6d131900d6 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:31 -0500 +Subject: [PATCH 07/17] hw/arm/smmuv3: Introduce smmuv3_s1_range_inval() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-7-eperezma@redhat.com> +Patchwork-id: 100599 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 06/13] hw/arm/smmuv3: Introduce smmuv3_s1_range_inval() helper +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +From: Eric Auger + +Let's introduce an helper for S1 IOVA range invalidation. +This will be used for NH_VA and NH_VAA commands. It decodes +the same fields, trace, calls the UNMAP notifiers and +invalidate the corresponding IOTLB entries. + +At the moment, we do not support 3.2 range invalidation yet. +So it reduces to a single IOVA invalidation. + +Note the leaf bit now is also decoded for the CMD_TLBI_NH_VAA +command. At the moment it is only used for tracing. + +Signed-off-by: Eric Auger +Reviewed-by: Peter Maydell +Message-id: 20200728150815.11446-7-eric.auger@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit c0f9ef70377cfcbd0fa6559d5dc729a930d71b7c) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmuv3.c | 36 +++++++++++++++++------------------- + hw/arm/trace-events | 3 +-- + 2 files changed, 18 insertions(+), 21 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 067c9480a03..ae2b769f891 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -824,6 +824,22 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) + } + } + ++static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) ++{ ++ dma_addr_t addr = CMD_ADDR(cmd); ++ uint8_t type = CMD_TYPE(cmd); ++ uint16_t vmid = CMD_VMID(cmd); ++ bool leaf = CMD_LEAF(cmd); ++ int asid = -1; ++ ++ if (type == SMMU_CMD_TLBI_NH_VA) { ++ asid = CMD_ASID(cmd); ++ } ++ trace_smmuv3_s1_range_inval(vmid, asid, addr, leaf); ++ smmuv3_inv_notifiers_iova(s, asid, addr); ++ smmu_iotlb_inv_iova(s, asid, addr); ++} ++ + static int smmuv3_cmdq_consume(SMMUv3State *s) + { + SMMUState *bs = ARM_SMMU(s); +@@ -954,27 +970,9 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + smmu_iotlb_inv_all(bs); + break; + case SMMU_CMD_TLBI_NH_VAA: +- { +- dma_addr_t addr = CMD_ADDR(&cmd); +- uint16_t vmid = CMD_VMID(&cmd); +- +- trace_smmuv3_cmdq_tlbi_nh_vaa(vmid, addr); +- smmuv3_inv_notifiers_iova(bs, -1, addr); +- smmu_iotlb_inv_iova(bs, -1, addr); +- break; +- } + case SMMU_CMD_TLBI_NH_VA: +- { +- uint16_t asid = CMD_ASID(&cmd); +- uint16_t vmid = CMD_VMID(&cmd); +- dma_addr_t addr = CMD_ADDR(&cmd); +- bool leaf = CMD_LEAF(&cmd); +- +- trace_smmuv3_cmdq_tlbi_nh_va(vmid, asid, addr, leaf); +- smmuv3_inv_notifiers_iova(bs, asid, addr); +- smmu_iotlb_inv_iova(bs, asid, addr); ++ smmuv3_s1_range_inval(bs, &cmd); + break; +- } + case SMMU_CMD_TLBI_EL3_ALL: + case SMMU_CMD_TLBI_EL3_VA: + case SMMU_CMD_TLBI_EL2_ALL: +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index f74d3e920f1..c219fe9e828 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -45,8 +45,7 @@ smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%d - end=0x%d" + smmuv3_cmdq_cfgi_cd(uint32_t sid) "streamid = %d" + smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid %d (hits=%d, misses=%d, hit rate=%d)" + smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)" +-smmuv3_cmdq_tlbi_nh_va(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" +-smmuv3_cmdq_tlbi_nh_vaa(int vmid, uint64_t addr) "vmid =%d addr=0x%"PRIx64 ++smmuv3_s1_range_inval(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" + smmuv3_cmdq_tlbi_nh(void) "" + smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" + smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-ehci-check-return-value-of-usb_packet_map.patch b/SOURCES/kvm-hw-ehci-check-return-value-of-usb_packet_map.patch new file mode 100644 index 0000000..3e3ed87 --- /dev/null +++ b/SOURCES/kvm-hw-ehci-check-return-value-of-usb_packet_map.patch @@ -0,0 +1,61 @@ +From 6955223aa15ab6ea53322218ec03fb3dc2b776f8 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Jan 2021 00:07:05 -0500 +Subject: [PATCH 16/17] hw: ehci: check return value of 'usb_packet_map' + +RH-Author: Jon Maloy +Message-id: <20210114000705.945169-2-jmaloy@redhat.com> +Patchwork-id: 100634 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] hw: ehci: check return value of 'usb_packet_map' +Bugzilla: 1898628 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Li Qiang + +If 'usb_packet_map' fails, we should stop to process the usb +request. + +Signed-off-by: Li Qiang +Message-Id: <20200812161727.29412-1-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann + +(cherry picked from commit 2fdb42d840400d58f2e706ecca82c142b97bcbd6) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/hcd-ehci.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index 56ab2f457f4..024b1ed6b67 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -1374,7 +1374,10 @@ static int ehci_execute(EHCIPacket *p, const char *action) + spd = (p->pid == USB_TOKEN_IN && NLPTR_TBIT(p->qtd.altnext) == 0); + usb_packet_setup(&p->packet, p->pid, ep, 0, p->qtdaddr, spd, + (p->qtd.token & QTD_TOKEN_IOC) != 0); +- usb_packet_map(&p->packet, &p->sgl); ++ if (usb_packet_map(&p->packet, &p->sgl)) { ++ qemu_sglist_destroy(&p->sgl); ++ return -1; ++ } + p->async = EHCI_ASYNC_INITIALIZED; + } + +@@ -1453,7 +1456,10 @@ static int ehci_process_itd(EHCIState *ehci, + if (ep && ep->type == USB_ENDPOINT_XFER_ISOC) { + usb_packet_setup(&ehci->ipacket, pid, ep, 0, addr, false, + (itd->transact[i] & ITD_XACT_IOC) != 0); +- usb_packet_map(&ehci->ipacket, &ehci->isgl); ++ if (usb_packet_map(&ehci->ipacket, &ehci->isgl)) { ++ qemu_sglist_destroy(&ehci->isgl); ++ return -1; ++ } + usb_handle_packet(dev, &ehci->ipacket); + usb_packet_unmap(&ehci->ipacket, &ehci->isgl); + } else { +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch b/SOURCES/kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch new file mode 100644 index 0000000..cf9f6ab --- /dev/null +++ b/SOURCES/kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch @@ -0,0 +1,62 @@ +From d48034cc2b331313995c1d19060decc0e5ca1356 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Jan 2021 01:35:41 -0500 +Subject: [PATCH 17/17] hw/net/e1000e: advance desc_offset in case of null + descriptor +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20210114013541.956735-2-jmaloy@redhat.com> +Patchwork-id: 100638 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] hw/net/e1000e: advance desc_offset in case of null descriptor +Bugzilla: 1903070 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Thomas Huth + +From: Prasad J Pandit + +While receiving packets via e1000e_write_packet_to_guest() routine, +'desc_offset' is advanced only when RX descriptor is processed. And +RX descriptor is not processed if it has NULL buffer address. +This may lead to an infinite loop condition. Increament 'desc_offset' +to process next descriptor in the ring to avoid infinite loop. + +Reported-by: Cheol-woo Myung <330cjfdn@gmail.com> +Signed-off-by: Prasad J Pandit +Signed-off-by: Jason Wang + +(cherry picked from c2cb511634012344e3d0fe49a037a33b12d8a98a) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/e1000e_core.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c +index 9b76f82db5b..166054f2e3f 100644 +--- a/hw/net/e1000e_core.c ++++ b/hw/net/e1000e_core.c +@@ -1596,13 +1596,13 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, + (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); + } + } +- desc_offset += desc_size; +- if (desc_offset >= total_size) { +- is_last = true; +- } + } else { /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + } ++ desc_offset += desc_size; ++ if (desc_offset >= total_size) { ++ is_last = true; ++ } + + e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, + rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch b/SOURCES/kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch new file mode 100644 index 0000000..228bdff --- /dev/null +++ b/SOURCES/kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch @@ -0,0 +1,56 @@ +From 94ca0eddc117b57da009dacb19740fc8ae00143a Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 28 Sep 2020 18:27:35 -0400 +Subject: [PATCH] hw/net/net_tx_pkt: fix assertion failure in + net_tx_pkt_add_raw_fragment() + +RH-Author: Jon Maloy +Message-id: <20200928182735.1008839-2-jmaloy@redhat.com> +Patchwork-id: 98497 +O-Subject: [RHEL-8.0.0 qemu-kvm PATCH 1/1] hw/net/net_tx_pkt: fix assertion failure in net_tx_pkt_add_raw_fragment() +Bugzilla: 1860994 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Xiao Wang +RH-Acked-by: Thomas Huth +RH-Acked-by: Stefan Hajnoczi + +From: Mauro Matteo Cascella + +An assertion failure issue was found in the code that processes network packets +while adding data fragments into the packet context. It could be abused by a +malicious guest to abort the QEMU process on the host. This patch replaces the +affected assert() with a conditional statement, returning false if the current +data fragment exceeds max_raw_frags. + +Reported-by: Alexander Bulekov +Reported-by: Ziming Zhang +Reviewed-by: Dmitry Fleytman +Signed-off-by: Mauro Matteo Cascella +Signed-off-by: Jason Wang + +(cherry picked from commit 035e69b063835a5fd23cacabd63690a3d84532a8) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/net_tx_pkt.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c +index 162f802dd77..54d4c3bbd02 100644 +--- a/hw/net/net_tx_pkt.c ++++ b/hw/net/net_tx_pkt.c +@@ -379,7 +379,10 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, + hwaddr mapped_len = 0; + struct iovec *ventry; + assert(pkt); +- assert(pkt->max_raw_frags > pkt->raw_frags); ++ ++ if (pkt->raw_frags >= pkt->max_raw_frags) { ++ return false; ++ } + + if (!len) { + return true; +-- +2.27.0 + diff --git a/SOURCES/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch b/SOURCES/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch new file mode 100644 index 0000000..5c335f8 --- /dev/null +++ b/SOURCES/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch @@ -0,0 +1,213 @@ +From 4daa8dca77edec191dfe0ae4a0a9fc70f8f63607 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 24 Feb 2021 11:30:37 -0500 +Subject: [PATCH 4/4] i386: Add the support for AMD EPYC 3rd generation + processors + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210224113037.15599-5-dgilbert@redhat.com> +Patchwork-id: 101202 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 4/4] i386: Add the support for AMD EPYC 3rd generation processors +Bugzilla: 1790620 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Peter Xu + +From: Babu Moger + +Adds the support for AMD 3rd generation processors. The model +display for the new processor will be EPYC-Milan. + +Adds the following new feature bits on top of the feature bits from +the first and second generation EPYC models. + +pcid : Process context identifiers support +ibrs : Indirect Branch Restricted Speculation +ssbd : Speculative Store Bypass Disable +erms : Enhanced REP MOVSB/STOSB support +fsrm : Fast Short REP MOVSB support +invpcid : Invalidate processor context ID +pku : Protection keys support +svme-addr-chk : SVM instructions address check for #GP handling + +Depends on the following kernel commits: +14c2bf81fcd2 ("KVM: SVM: Fix #GP handling for doubly-nested virtualization") +3b9c723ed7cf ("KVM: SVM: Add support for SVM instruction address check change") +4aa2691dcbd3 ("8ce1c461188799d863398dd2865d KVM: x86: Factor out x86 instruction emulation with decoding") +4407a797e941 ("KVM: SVM: Enable INVPCID feature on AMD") +9715092f8d7e ("KVM: X86: Move handling of INVPCID types to x86") +3f3393b3ce38 ("KVM: X86: Rename and move the function vmx_handle_memory_failure to x86.c") +830bd71f2c06 ("KVM: SVM: Remove set_cr_intercept, clr_cr_intercept and is_cr_intercept") +4c44e8d6c193 ("KVM: SVM: Add new intercept word in vmcb_control_area") +c62e2e94b9d4 ("KVM: SVM: Modify 64 bit intercept field to two 32 bit vectors") +9780d51dc2af ("KVM: SVM: Modify intercept_exceptions to generic intercepts") +30abaa88382c ("KVM: SVM: Change intercept_dr to generic intercepts") +03bfeeb988a9 ("KVM: SVM: Change intercept_cr to generic intercepts") +c45ad7229d13 ("KVM: SVM: Introduce vmcb_(set_intercept/clr_intercept/_is_intercept)") +a90c1ed9f11d ("(pcid) KVM: nSVM: Remove unused field") +fa44b82eb831 ("KVM: x86: Move MPK feature detection to common code") +38f3e775e9c2 ("x86/Kconfig: Update config and kernel doc for MPK feature on AMD") +37486135d3a7 ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c") + +Signed-off-by: Babu Moger +Message-Id: <161290460478.11352.8933244555799318236.stgit@bmoger-ubuntu> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 623972ceae091b31331ae4a1dc94fe5cbb891937) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 107 +++++++++++++++++++++++++++++++++++++++++++++- + target/i386/cpu.h | 4 ++ + 2 files changed, 110 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 7227c803c3..d5b0d4b7f0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1133,7 +1133,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "clzero", NULL, "xsaveerptr", NULL, + NULL, NULL, NULL, NULL, + NULL, "wbnoinvd", NULL, NULL, +- "ibpb", NULL, NULL, "amd-stibp", ++ "ibpb", NULL, "ibrs", "amd-stibp", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, +@@ -1853,6 +1853,56 @@ static CPUCaches epyc_rome_cache_info = { + }, + }; + ++static CPUCaches epyc_milan_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 32 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 32768, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = true, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -4124,6 +4174,61 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "AMD EPYC-Rome Processor", + .cache_info = &epyc_rome_cache_info, + }, ++ { ++ .name = "EPYC-Milan", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 25, ++ .model = 1, ++ .stepping = 1, ++ .features[FEAT_1_EDX] = ++ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | ++ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | ++ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | ++ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | ++ CPUID_VME | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | ++ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | ++ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | ++ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | ++ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | ++ CPUID_EXT_PCID, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | ++ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | ++ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | ++ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | ++ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | ++ CPUID_8000_0008_EBX_AMD_SSBD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | ++ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | ++ CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_PKU, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_SVM] = ++ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_SVME_ADDR_CHK, ++ .xlevel = 0x8000001E, ++ .model_id = "AMD EPYC-Milan Processor", ++ .cache_info = &epyc_milan_cache_info, ++ }, + }; + + /* KVM-specific features that are automatically added/removed +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index e1b67910c2..7a3aa40201 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -800,8 +800,12 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) + /* Indirect Branch Prediction Barrier */ + #define CPUID_8000_0008_EBX_IBPB (1U << 12) ++/* Indirect Branch Restricted Speculation */ ++#define CPUID_8000_0008_EBX_IBRS (1U << 14) + /* Single Thread Indirect Branch Predictors */ + #define CPUID_8000_0008_EBX_STIBP (1U << 15) ++/* Speculative Store Bypass Disable */ ++#define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) + + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) +-- +2.27.0 + diff --git a/SOURCES/kvm-ide-atapi-check-logical-block-address-and-read-size-.patch b/SOURCES/kvm-ide-atapi-check-logical-block-address-and-read-size-.patch new file mode 100644 index 0000000..706bd8b --- /dev/null +++ b/SOURCES/kvm-ide-atapi-check-logical-block-address-and-read-size-.patch @@ -0,0 +1,120 @@ +From 0453588f95294ed5ce912cb8b810a322bf9d91e0 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 25 Feb 2021 19:43:02 -0500 +Subject: [PATCH] ide: atapi: check logical block address and read size + (CVE-2020-29443) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20210225194302.3137699-2-jmaloy@redhat.com> +Patchwork-id: 101208 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 1/1] ide: atapi: check logical block address and read size (CVE-2020-29443) +Bugzilla: 1917451 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Danilo de Paula +RH-Acked-by: Paolo Bonzini + +From: Prasad J Pandit + +While processing ATAPI cmd_read/cmd_read_cd commands, +Logical Block Address (LBA) maybe invalid OR closer to the last block, +leading to an OOB access issues. Add range check to avoid it. + +Fixes: CVE-2020-29443 +Reported-by: Wenxiang Qian +Suggested-by: Paolo Bonzini +Reviewed-by: Paolo Bonzini +Signed-off-by: Prasad J Pandit +Message-Id: <20210118115130.457044-1-ppandit@redhat.com> +Signed-off-by: Paolo Bonzini + +(cherry picked from commit b8d7f1bc59276fec85e4d09f1567613a3e14d31e) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/ide/atapi.c | 30 ++++++++++++++++++++++++------ + 1 file changed, 24 insertions(+), 6 deletions(-) + +diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c +index 17a9d635d8..d064935c8d 100644 +--- a/hw/ide/atapi.c ++++ b/hw/ide/atapi.c +@@ -320,6 +320,8 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size) + static void ide_atapi_cmd_read_pio(IDEState *s, int lba, int nb_sectors, + int sector_size) + { ++ assert(0 <= lba && lba < (s->nb_sectors >> 2)); ++ + s->lba = lba; + s->packet_transfer_size = nb_sectors * sector_size; + s->elementary_transfer_size = 0; +@@ -418,6 +420,8 @@ eot: + static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors, + int sector_size) + { ++ assert(0 <= lba && lba < (s->nb_sectors >> 2)); ++ + s->lba = lba; + s->packet_transfer_size = nb_sectors * sector_size; + s->io_buffer_size = 0; +@@ -971,35 +975,49 @@ static void cmd_prevent_allow_medium_removal(IDEState *s, uint8_t* buf) + + static void cmd_read(IDEState *s, uint8_t* buf) + { +- int nb_sectors, lba; ++ unsigned int nb_sectors, lba; ++ ++ /* Total logical sectors of ATAPI_SECTOR_SIZE(=2048) bytes */ ++ uint64_t total_sectors = s->nb_sectors >> 2; + + if (buf[0] == GPCMD_READ_10) { + nb_sectors = lduw_be_p(buf + 7); + } else { + nb_sectors = ldl_be_p(buf + 6); + } +- +- lba = ldl_be_p(buf + 2); + if (nb_sectors == 0) { + ide_atapi_cmd_ok(s); + return; + } + ++ lba = ldl_be_p(buf + 2); ++ if (lba >= total_sectors || lba + nb_sectors - 1 >= total_sectors) { ++ ide_atapi_cmd_error(s, ILLEGAL_REQUEST, ASC_LOGICAL_BLOCK_OOR); ++ return; ++ } ++ + ide_atapi_cmd_read(s, lba, nb_sectors, 2048); + } + + static void cmd_read_cd(IDEState *s, uint8_t* buf) + { +- int nb_sectors, lba, transfer_request; ++ unsigned int nb_sectors, lba, transfer_request; + +- nb_sectors = (buf[6] << 16) | (buf[7] << 8) | buf[8]; +- lba = ldl_be_p(buf + 2); ++ /* Total logical sectors of ATAPI_SECTOR_SIZE(=2048) bytes */ ++ uint64_t total_sectors = s->nb_sectors >> 2; + ++ nb_sectors = (buf[6] << 16) | (buf[7] << 8) | buf[8]; + if (nb_sectors == 0) { + ide_atapi_cmd_ok(s); + return; + } + ++ lba = ldl_be_p(buf + 2); ++ if (lba >= total_sectors || lba + nb_sectors - 1 >= total_sectors) { ++ ide_atapi_cmd_error(s, ILLEGAL_REQUEST, ASC_LOGICAL_BLOCK_OOR); ++ return; ++ } ++ + transfer_request = buf[9] & 0xf8; + if (transfer_request == 0x00) { + /* nothing */ +-- +2.27.0 + diff --git a/SOURCES/kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch b/SOURCES/kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch new file mode 100644 index 0000000..db89a06 --- /dev/null +++ b/SOURCES/kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch @@ -0,0 +1,58 @@ +From d8f84a8086dbe339a9f97dbcd10abd6379525068 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:37 -0500 +Subject: [PATCH 13/17] intel_iommu: Skip page walking on device iotlb + invalidations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-13-eperezma@redhat.com> +Patchwork-id: 100605 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 12/13] intel_iommu: Skip page walking on device iotlb invalidations +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +Although they didn't reach the notifier because of the filtering in +memory_region_notify_iommu_one, the vt-d was still splitting huge +memory invalidations in chunks. Skipping it. + +This improves performance in case of netperf with vhost-net: +* TCP_STREAM: From 1923.6Mbit/s to 2175.13Mbit/s (13%) +* TCP_RR: From 8464.73 trans/s to 8932.703333 trans/s (5.5%) +* UDP_RR: From 8562.08 trans/s to 9005.62/s (5.1%) +* UDP_STREAM: No change observed (insignificant 0.1% improvement) + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f7701e2c7983b680790af47117577b285b6a1aed) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/intel_iommu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 3640bc2ed15..2b270f06645 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -1421,6 +1421,10 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) + VTDContextEntry ce; + IOMMUNotifier *n; + ++ if (!(vtd_as->iommu.iommu_notify_flags & IOMMU_NOTIFIER_IOTLB_EVENTS)) { ++ return 0; ++ } ++ + ret = vtd_dev_to_context_entry(vtd_as->iommu_state, + pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce); +-- +2.27.0 + diff --git a/SOURCES/kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch b/SOURCES/kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch new file mode 100644 index 0000000..0e55df4 --- /dev/null +++ b/SOURCES/kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch @@ -0,0 +1,290 @@ +From cadb72854b44f53c07ea60d7a6149ccac5928a82 Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Tue, 27 Oct 2020 12:02:15 -0400 +Subject: [PATCH 02/18] libvhost-user: handle endianness as mandated by the + spec + +RH-Author: Claudio Imbrenda +Message-id: <20201027120217.2997314-2-cimbrend@redhat.com> +Patchwork-id: 98723 +O-Subject: [RHEL8.4 qemu-kvm PATCH 1/3] libvhost-user: handle endianness as mandated by the spec +Bugzilla: 1857733 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +From: Marc Hartmayer + +Since virtio existed even before it got standardized, the virtio +standard defines the following types of virtio devices: + + + legacy device (pre-virtio 1.0) + + non-legacy or VIRTIO 1.0 device + + transitional device (which can act both as legacy and non-legacy) + +Virtio 1.0 defines the fields of the virtqueues as little endian, +while legacy uses guest's native endian [1]. Currently libvhost-user +does not handle virtio endianness at all, i.e. it works only if the +native endianness matches with whatever is actually needed. That means +things break spectacularly on big-endian targets. Let us handle virtio +endianness for non-legacy as required by the virtio specification [1] +and fence legacy virtio, as there is no safe way to figure out the +needed endianness conversions for all cases. The fencing of legacy +virtio devices is done in `vu_set_features_exec`. + +[1] https://docs.oasis-open.org/virtio/virtio/v1.1/cs01/virtio-v1.1-cs01.html#x1-210003 + +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Marc Hartmayer +Message-id: 20200901150019.29229-3-mhartmay@linux.ibm.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 2ffc54708087c6e524297957be2fc5d543abb767) +Signed-off-by: Danilo C. L. de Paula +--- + contrib/libvhost-user/libvhost-user.c | 77 +++++++++++++++------------ + 1 file changed, 43 insertions(+), 34 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index b89bf185013..b8350b067e3 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -42,6 +42,7 @@ + + #include "qemu/atomic.h" + #include "qemu/osdep.h" ++#include "qemu/bswap.h" + #include "qemu/memfd.h" + + #include "libvhost-user.h" +@@ -522,6 +523,14 @@ vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg) + DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + + dev->features = vmsg->payload.u64; ++ if (!vu_has_feature(dev, VIRTIO_F_VERSION_1)) { ++ /* ++ * We only support devices conforming to VIRTIO 1.0 or ++ * later ++ */ ++ vu_panic(dev, "virtio legacy devices aren't supported by libvhost-user"); ++ return false; ++ } + + if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) { + vu_set_enable_all_rings(dev, true); +@@ -886,7 +895,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) + return false; + } + +- vq->used_idx = vq->vring.used->idx; ++ vq->used_idx = lduw_le_p(&vq->vring.used->idx); + + if (vq->last_avail_idx != vq->used_idx) { + bool resume = dev->iface->queue_is_processed_in_order && +@@ -998,7 +1007,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq) + return 0; + } + +- vq->used_idx = vq->vring.used->idx; ++ vq->used_idx = lduw_le_p(&vq->vring.used->idx); + vq->resubmit_num = 0; + vq->resubmit_list = NULL; + vq->counter = 0; +@@ -1737,13 +1746,13 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq) + static inline uint16_t + vring_avail_flags(VuVirtq *vq) + { +- return vq->vring.avail->flags; ++ return lduw_le_p(&vq->vring.avail->flags); + } + + static inline uint16_t + vring_avail_idx(VuVirtq *vq) + { +- vq->shadow_avail_idx = vq->vring.avail->idx; ++ vq->shadow_avail_idx = lduw_le_p(&vq->vring.avail->idx); + + return vq->shadow_avail_idx; + } +@@ -1751,7 +1760,7 @@ vring_avail_idx(VuVirtq *vq) + static inline uint16_t + vring_avail_ring(VuVirtq *vq, int i) + { +- return vq->vring.avail->ring[i]; ++ return lduw_le_p(&vq->vring.avail->ring[i]); + } + + static inline uint16_t +@@ -1839,12 +1848,12 @@ virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc, + int i, unsigned int max, unsigned int *next) + { + /* If this descriptor says it doesn't chain, we're done. */ +- if (!(desc[i].flags & VRING_DESC_F_NEXT)) { ++ if (!(lduw_le_p(&desc[i].flags) & VRING_DESC_F_NEXT)) { + return VIRTQUEUE_READ_DESC_DONE; + } + + /* Check they're not leading us off end of descriptors. */ +- *next = desc[i].next; ++ *next = lduw_le_p(&desc[i].next); + /* Make sure compiler knows to grab that: we don't want it changing! */ + smp_wmb(); + +@@ -1887,8 +1896,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, + } + desc = vq->vring.desc; + +- if (desc[i].flags & VRING_DESC_F_INDIRECT) { +- if (desc[i].len % sizeof(struct vring_desc)) { ++ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) { ++ if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) { + vu_panic(dev, "Invalid size for indirect buffer table"); + goto err; + } +@@ -1901,8 +1910,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, + + /* loop over the indirect descriptor table */ + indirect = 1; +- desc_addr = desc[i].addr; +- desc_len = desc[i].len; ++ desc_addr = ldq_le_p(&desc[i].addr); ++ desc_len = ldl_le_p(&desc[i].len); + max = desc_len / sizeof(struct vring_desc); + read_len = desc_len; + desc = vu_gpa_to_va(dev, &read_len, desc_addr); +@@ -1929,10 +1938,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, + goto err; + } + +- if (desc[i].flags & VRING_DESC_F_WRITE) { +- in_total += desc[i].len; ++ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) { ++ in_total += ldl_le_p(&desc[i].len); + } else { +- out_total += desc[i].len; ++ out_total += ldl_le_p(&desc[i].len); + } + if (in_total >= max_in_bytes && out_total >= max_out_bytes) { + goto done; +@@ -2047,7 +2056,7 @@ vring_used_flags_set_bit(VuVirtq *vq, int mask) + + flags = (uint16_t *)((char*)vq->vring.used + + offsetof(struct vring_used, flags)); +- *flags |= mask; ++ stw_le_p(flags, lduw_le_p(flags) | mask); + } + + static inline void +@@ -2057,7 +2066,7 @@ vring_used_flags_unset_bit(VuVirtq *vq, int mask) + + flags = (uint16_t *)((char*)vq->vring.used + + offsetof(struct vring_used, flags)); +- *flags &= ~mask; ++ stw_le_p(flags, lduw_le_p(flags) & ~mask); + } + + static inline void +@@ -2067,7 +2076,7 @@ vring_set_avail_event(VuVirtq *vq, uint16_t val) + return; + } + +- *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val; ++ stw_le_p(&vq->vring.used->ring[vq->vring.num], val); + } + + void +@@ -2156,14 +2165,14 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) + struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; + int rc; + +- if (desc[i].flags & VRING_DESC_F_INDIRECT) { +- if (desc[i].len % sizeof(struct vring_desc)) { ++ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) { ++ if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) { + vu_panic(dev, "Invalid size for indirect buffer table"); + } + + /* loop over the indirect descriptor table */ +- desc_addr = desc[i].addr; +- desc_len = desc[i].len; ++ desc_addr = ldq_le_p(&desc[i].addr); ++ desc_len = ldl_le_p(&desc[i].len); + max = desc_len / sizeof(struct vring_desc); + read_len = desc_len; + desc = vu_gpa_to_va(dev, &read_len, desc_addr); +@@ -2185,10 +2194,10 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) + + /* Collect all the descriptors */ + do { +- if (desc[i].flags & VRING_DESC_F_WRITE) { ++ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) { + virtqueue_map_desc(dev, &in_num, iov + out_num, + VIRTQUEUE_MAX_SIZE - out_num, true, +- desc[i].addr, desc[i].len); ++ ldq_le_p(&desc[i].addr), ldl_le_p(&desc[i].len)); + } else { + if (in_num) { + vu_panic(dev, "Incorrect order for descriptors"); +@@ -2196,7 +2205,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz) + } + virtqueue_map_desc(dev, &out_num, iov, + VIRTQUEUE_MAX_SIZE, false, +- desc[i].addr, desc[i].len); ++ ldq_le_p(&desc[i].addr), ldl_le_p(&desc[i].len)); + } + + /* If we've got too many, that implies a descriptor loop. */ +@@ -2392,14 +2401,14 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, + max = vq->vring.num; + i = elem->index; + +- if (desc[i].flags & VRING_DESC_F_INDIRECT) { +- if (desc[i].len % sizeof(struct vring_desc)) { ++ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) { ++ if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) { + vu_panic(dev, "Invalid size for indirect buffer table"); + } + + /* loop over the indirect descriptor table */ +- desc_addr = desc[i].addr; +- desc_len = desc[i].len; ++ desc_addr = ldq_le_p(&desc[i].addr); ++ desc_len = ldl_le_p(&desc[i].len); + max = desc_len / sizeof(struct vring_desc); + read_len = desc_len; + desc = vu_gpa_to_va(dev, &read_len, desc_addr); +@@ -2425,9 +2434,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, + return; + } + +- if (desc[i].flags & VRING_DESC_F_WRITE) { +- min = MIN(desc[i].len, len); +- vu_log_write(dev, desc[i].addr, min); ++ if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) { ++ min = MIN(ldl_le_p(&desc[i].len), len); ++ vu_log_write(dev, ldq_le_p(&desc[i].addr), min); + len -= min; + } + +@@ -2452,15 +2461,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq, + + idx = (idx + vq->used_idx) % vq->vring.num; + +- uelem.id = elem->index; +- uelem.len = len; ++ stl_le_p(&uelem.id, elem->index); ++ stl_le_p(&uelem.len, len); + vring_used_write(dev, vq, &uelem, idx); + } + + static inline + void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val) + { +- vq->vring.used->idx = val; ++ stw_le_p(&vq->vring.used->idx, val); + vu_log_write(dev, + vq->vring.log_guest_addr + offsetof(struct vring_used, idx), + sizeof(vq->vring.used->idx)); +-- +2.27.0 + diff --git a/SOURCES/kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch b/SOURCES/kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch new file mode 100644 index 0000000..1217a6c --- /dev/null +++ b/SOURCES/kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch @@ -0,0 +1,83 @@ +From d9a63d12b5804eb172a040a16d7e725853c41a8c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:12 -0500 +Subject: [PATCH 12/18] linux-headers: Partial update against Linux 5.9-rc4 + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-9-thuth@redhat.com> +Patchwork-id: 99505 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 08/12] linux-headers: Partial update against Linux 5.9-rc4 +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Upstream-status: N/A + +This is based on upstream commit e6546342a830e520d14ef03aa95677611de0d90c +but only the two files have been included (there were too many conflicts +in the other unrelated files, so they have been dropped from this patch). + +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + linux-headers/asm-s390/kvm.h | 7 +++++-- + linux-headers/linux/kvm.h | 6 ++++++ + 2 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index 0138ccb0d89..f053b8304a8 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch { + #define KVM_SYNC_GSCB (1UL << 9) + #define KVM_SYNC_BPBC (1UL << 10) + #define KVM_SYNC_ETOKEN (1UL << 11) ++#define KVM_SYNC_DIAG318 (1UL << 12) + + #define KVM_SYNC_S390_VALID_FIELDS \ + (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ + KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ +- KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) ++ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \ ++ KVM_SYNC_DIAG318) + + /* length and alignment of the sdnx as a power of two */ + #define SDNXC 8 +@@ -264,7 +266,8 @@ struct kvm_sync_regs { + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ + __u8 riccb[64]; /* runtime instrumentation controls block */ +- __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ ++ __u64 diag318; /* diagnose 0x318 info */ ++ __u8 padding2[184]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 578cd97c0d9..6bba4ec136b 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -276,6 +276,7 @@ struct kvm_run { + /* KVM_EXIT_FAIL_ENTRY */ + struct { + __u64 hardware_entry_failure_reason; ++ __u32 cpu; + } fail_entry; + /* KVM_EXIT_EXCEPTION */ + struct { +@@ -1011,6 +1012,11 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_S390_VCPU_RESETS 179 + #define KVM_CAP_S390_PROTECTED 180 + #define KVM_CAP_PPC_SECURE_GUEST 181 ++#define KVM_CAP_HALT_POLL 182 ++#define KVM_CAP_ASYNC_PF_INT 183 ++#define KVM_CAP_LAST_CPU 184 ++#define KVM_CAP_SMALLER_MAXPHYADDR 185 ++#define KVM_CAP_S390_DIAG318 186 + + #ifdef KVM_CAP_IRQ_ROUTING + +-- +2.27.0 + diff --git a/SOURCES/kvm-linux-headers-add-vfio-DMA-available-capability.patch b/SOURCES/kvm-linux-headers-add-vfio-DMA-available-capability.patch new file mode 100644 index 0000000..f62026d --- /dev/null +++ b/SOURCES/kvm-linux-headers-add-vfio-DMA-available-capability.patch @@ -0,0 +1,54 @@ +From b50c47e1a9fbe8876e231afbb5ed85945c8038da Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 19 Jan 2021 12:50:40 -0500 +Subject: [PATCH 1/7] linux-headers: add vfio DMA available capability + +RH-Author: Cornelia Huck +Message-id: <20210119125046.472811-2-cohuck@redhat.com> +Patchwork-id: 100674 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/7] linux-headers: add vfio DMA available capability +Bugzilla: 1905391 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +UPSTREAM: RHEL only + +This is the part of 53ba2eee52bf ("linux-headers: update against +5.10-rc1") required for DMA limiting. + +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + linux-headers/linux/vfio.h | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 9e227348b30..f660bd7bace 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -751,6 +751,21 @@ struct vfio_iommu_type1_info_cap_iova_range { + struct vfio_iova_range iova_ranges[]; + }; + ++/* ++ * The DMA available capability allows to report the current number of ++ * simultaneously outstanding DMA mappings that are allowed. ++ * ++ * The structure below defines version 1 of this capability. ++ * ++ * avail: specifies the current number of outstanding DMA mappings allowed. ++ */ ++#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 ++ ++struct vfio_iommu_type1_info_dma_avail { ++ struct vfio_info_cap_header header; ++ __u32 avail; ++}; ++ + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + + /** +-- +2.27.0 + diff --git a/SOURCES/kvm-memory-Add-IOMMUTLBEvent.patch b/SOURCES/kvm-memory-Add-IOMMUTLBEvent.patch new file mode 100644 index 0000000..5d73c97 --- /dev/null +++ b/SOURCES/kvm-memory-Add-IOMMUTLBEvent.patch @@ -0,0 +1,590 @@ +From 43a460bde62359c3fa2b1fc6c90d9e13ee7b9a6c Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:35 -0500 +Subject: [PATCH 11/17] memory: Add IOMMUTLBEvent +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-11-eperezma@redhat.com> +Patchwork-id: 100603 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 10/13] memory: Add IOMMUTLBEvent +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +This way we can tell between regular IOMMUTLBEntry (entry of IOMMU +hardware) and notifications. + +In the notifications, we set explicitly if it is a MAPs or an UNMAP, +instead of trusting in entry permissions to differentiate them. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Matthew Rosato +Acked-by: David Gibson +(cherry picked from commit 5039caf3c449c49e625d34e134463260cf8e00e0) + +Conflicts: + hw/s390x/s390-pci-inst.c: Context because of the lack of commit + ("37fa32de707 s390x/pci: Honor DMA limits set by vfio"). + hw/virtio/virtio-iommu.c: It does not exist in rhel. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 13 +++--- + hw/arm/smmuv3.c | 13 +++--- + hw/i386/intel_iommu.c | 88 ++++++++++++++++++++++------------------ + hw/misc/tz-mpc.c | 32 ++++++++------- + hw/ppc/spapr_iommu.c | 15 +++---- + hw/s390x/s390-pci-inst.c | 27 +++++++----- + include/exec/memory.h | 27 ++++++------ + memory.c | 20 ++++----- + 8 files changed, 127 insertions(+), 108 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index dfabe381182..a519c97614a 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -464,14 +464,15 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) + /* Unmap the whole notifier's range */ + static void smmu_unmap_notifier_range(IOMMUNotifier *n) + { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + +- entry.target_as = &address_space_memory; +- entry.iova = n->start; +- entry.perm = IOMMU_NONE; +- entry.addr_mask = n->end - n->start; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &address_space_memory; ++ event.entry.iova = n->start; ++ event.entry.perm = IOMMU_NONE; ++ event.entry.addr_mask = n->end - n->start; + +- memory_region_notify_iommu_one(n, &entry); ++ memory_region_notify_iommu_one(n, &event); + } + + /* Unmap all notifiers attached to @mr */ +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index ef8a877c5d8..10b8393beeb 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -783,7 +783,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + uint8_t tg, uint64_t num_pages) + { + SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + uint8_t granule = tg; + + if (!tg) { +@@ -806,12 +806,13 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + granule = tt->granule_sz; + } + +- entry.target_as = &address_space_memory; +- entry.iova = iova; +- entry.addr_mask = num_pages * (1 << granule) - 1; +- entry.perm = IOMMU_NONE; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &address_space_memory; ++ event.entry.iova = iova; ++ event.entry.addr_mask = num_pages * (1 << granule) - 1; ++ event.entry.perm = IOMMU_NONE; + +- memory_region_notify_iommu_one(n, &entry); ++ memory_region_notify_iommu_one(n, &event); + } + + /* invalidate an asid/iova range tuple in all mr's */ +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 463f107ad12..9fedbac82de 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -1016,7 +1016,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, + } + } + +-typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); ++typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); + + /** + * Constant information used during page walking +@@ -1037,11 +1037,12 @@ typedef struct { + uint16_t domain_id; + } vtd_page_walk_info; + +-static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) ++static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) + { + VTDAddressSpace *as = info->as; + vtd_page_walk_hook hook_fn = info->hook_fn; + void *private = info->private; ++ IOMMUTLBEntry *entry = &event->entry; + DMAMap target = { + .iova = entry->iova, + .size = entry->addr_mask, +@@ -1050,7 +1051,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + }; + DMAMap *mapped = iova_tree_find(as->iova_tree, &target); + +- if (entry->perm == IOMMU_NONE && !info->notify_unmap) { ++ if (event->type == IOMMU_NOTIFIER_UNMAP && !info->notify_unmap) { + trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); + return 0; + } +@@ -1058,7 +1059,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + assert(hook_fn); + + /* Update local IOVA mapped ranges */ +- if (entry->perm) { ++ if (event->type == IOMMU_NOTIFIER_MAP) { + if (mapped) { + /* If it's exactly the same translation, skip */ + if (!memcmp(mapped, &target, sizeof(target))) { +@@ -1084,19 +1085,21 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + int ret; + + /* Emulate an UNMAP */ ++ event->type = IOMMU_NOTIFIER_UNMAP; + entry->perm = IOMMU_NONE; + trace_vtd_page_walk_one(info->domain_id, + entry->iova, + entry->translated_addr, + entry->addr_mask, + entry->perm); +- ret = hook_fn(entry, private); ++ ret = hook_fn(event, private); + if (ret) { + return ret; + } + /* Drop any existing mapping */ + iova_tree_remove(as->iova_tree, &target); +- /* Recover the correct permission */ ++ /* Recover the correct type */ ++ event->type = IOMMU_NOTIFIER_MAP; + entry->perm = cache_perm; + } + } +@@ -1113,7 +1116,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + trace_vtd_page_walk_one(info->domain_id, entry->iova, + entry->translated_addr, entry->addr_mask, + entry->perm); +- return hook_fn(entry, private); ++ return hook_fn(event, private); + } + + /** +@@ -1134,7 +1137,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, + uint32_t offset; + uint64_t slpte; + uint64_t subpage_size, subpage_mask; +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + uint64_t iova = start; + uint64_t iova_next; + int ret = 0; +@@ -1188,13 +1191,15 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, + * + * In either case, we send an IOTLB notification down. + */ +- entry.target_as = &address_space_memory; +- entry.iova = iova & subpage_mask; +- entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); +- entry.addr_mask = ~subpage_mask; ++ event.entry.target_as = &address_space_memory; ++ event.entry.iova = iova & subpage_mask; ++ event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); ++ event.entry.addr_mask = ~subpage_mask; + /* NOTE: this is only meaningful if entry_valid == true */ +- entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); +- ret = vtd_page_walk_one(&entry, info); ++ event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); ++ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : ++ IOMMU_NOTIFIER_UNMAP; ++ ret = vtd_page_walk_one(&event, info); + } + + if (ret < 0) { +@@ -1373,10 +1378,10 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, + return 0; + } + +-static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry, ++static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event, + void *private) + { +- memory_region_notify_iommu((IOMMUMemoryRegion *)private, 0, *entry); ++ memory_region_notify_iommu(private, 0, *event); + return 0; + } + +@@ -1936,14 +1941,17 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, + * page tables. We just deliver the PSI down to + * invalidate caches. + */ +- IOMMUTLBEntry entry = { +- .target_as = &address_space_memory, +- .iova = addr, +- .translated_addr = 0, +- .addr_mask = size - 1, +- .perm = IOMMU_NONE, ++ IOMMUTLBEvent event = { ++ .type = IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .iova = addr, ++ .translated_addr = 0, ++ .addr_mask = size - 1, ++ .perm = IOMMU_NONE, ++ }, + }; +- memory_region_notify_iommu(&vtd_as->iommu, 0, entry); ++ memory_region_notify_iommu(&vtd_as->iommu, 0, event); + } + } + } +@@ -2355,7 +2363,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) + { + VTDAddressSpace *vtd_dev_as; +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + struct VTDBus *vtd_bus; + hwaddr addr; + uint64_t sz; +@@ -2403,12 +2411,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, + sz = VTD_PAGE_SIZE; + } + +- entry.target_as = &vtd_dev_as->as; +- entry.addr_mask = sz - 1; +- entry.iova = addr; +- entry.perm = IOMMU_NONE; +- entry.translated_addr = 0; +- memory_region_notify_iommu(&vtd_dev_as->iommu, 0, entry); ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &vtd_dev_as->as; ++ event.entry.addr_mask = sz - 1; ++ event.entry.iova = addr; ++ event.entry.perm = IOMMU_NONE; ++ event.entry.translated_addr = 0; ++ memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); + + done: + return true; +@@ -3419,19 +3428,20 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) + size = remain = end - start + 1; + + while (remain >= VTD_PAGE_SIZE) { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + uint64_t mask = get_naturally_aligned_size(start, remain, s->aw_bits); + + assert(mask); + +- entry.iova = start; +- entry.addr_mask = mask - 1; +- entry.target_as = &address_space_memory; +- entry.perm = IOMMU_NONE; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.iova = start; ++ event.entry.addr_mask = mask - 1; ++ event.entry.target_as = &address_space_memory; ++ event.entry.perm = IOMMU_NONE; + /* This field is meaningless for unmap */ +- entry.translated_addr = 0; ++ event.entry.translated_addr = 0; + +- memory_region_notify_iommu_one(n, &entry); ++ memory_region_notify_iommu_one(n, &event); + + start += mask; + remain -= mask; +@@ -3467,9 +3477,9 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) + vtd_switch_address_space_all(s); + } + +-static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) ++static int vtd_replay_hook(IOMMUTLBEvent *event, void *private) + { +- memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); ++ memory_region_notify_iommu_one(private, event); + return 0; + } + +diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c +index 49dd6050bd3..e2fbd1065d8 100644 +--- a/hw/misc/tz-mpc.c ++++ b/hw/misc/tz-mpc.c +@@ -82,8 +82,10 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, + /* Called when the LUT word at lutidx has changed from oldlut to newlut; + * must call the IOMMU notifiers for the changed blocks. + */ +- IOMMUTLBEntry entry = { +- .addr_mask = s->blocksize - 1, ++ IOMMUTLBEvent event = { ++ .entry = { ++ .addr_mask = s->blocksize - 1, ++ } + }; + hwaddr addr = lutidx * s->blocksize * 32; + int i; +@@ -100,26 +102,28 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, + block_is_ns = newlut & (1 << i); + + trace_tz_mpc_iommu_notify(addr); +- entry.iova = addr; +- entry.translated_addr = addr; ++ event.entry.iova = addr; ++ event.entry.translated_addr = addr; + +- entry.perm = IOMMU_NONE; +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.perm = IOMMU_NONE; ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); + +- entry.perm = IOMMU_RW; ++ event.type = IOMMU_NOTIFIER_MAP; ++ event.entry.perm = IOMMU_RW; + if (block_is_ns) { +- entry.target_as = &s->blocked_io_as; ++ event.entry.target_as = &s->blocked_io_as; + } else { +- entry.target_as = &s->downstream_as; ++ event.entry.target_as = &s->downstream_as; + } +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); + if (block_is_ns) { +- entry.target_as = &s->downstream_as; ++ event.entry.target_as = &s->downstream_as; + } else { +- entry.target_as = &s->blocked_io_as; ++ event.entry.target_as = &s->blocked_io_as; + } +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); + } + } + +diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c +index 3d3bcc86496..9d3ec7e2c07 100644 +--- a/hw/ppc/spapr_iommu.c ++++ b/hw/ppc/spapr_iommu.c +@@ -445,7 +445,7 @@ static void spapr_tce_reset(DeviceState *dev) + static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, + target_ulong tce) + { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; + +@@ -457,12 +457,13 @@ static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, + + tcet->table[index] = tce; + +- entry.target_as = &address_space_memory, +- entry.iova = (ioba - tcet->bus_offset) & page_mask; +- entry.translated_addr = tce & page_mask; +- entry.addr_mask = ~page_mask; +- entry.perm = spapr_tce_iommu_access_flags(tce); +- memory_region_notify_iommu(&tcet->iommu, 0, entry); ++ event.entry.target_as = &address_space_memory, ++ event.entry.iova = (ioba - tcet->bus_offset) & page_mask; ++ event.entry.translated_addr = tce & page_mask; ++ event.entry.addr_mask = ~page_mask; ++ event.entry.perm = spapr_tce_iommu_access_flags(tce); ++ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; ++ memory_region_notify_iommu(&tcet->iommu, 0, event); + + return H_SUCCESS; + } +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 92c7e45df5f..27b189e6d75 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -575,15 +575,18 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) + { + S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova); +- IOMMUTLBEntry notify = { +- .target_as = &address_space_memory, +- .iova = entry->iova, +- .translated_addr = entry->translated_addr, +- .perm = entry->perm, +- .addr_mask = ~PAGE_MASK, ++ IOMMUTLBEvent event = { ++ .type = entry->perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .iova = entry->iova, ++ .translated_addr = entry->translated_addr, ++ .perm = entry->perm, ++ .addr_mask = ~PAGE_MASK, ++ }, + }; + +- if (entry->perm == IOMMU_NONE) { ++ if (event.type == IOMMU_NOTIFIER_UNMAP) { + if (!cache) { + return; + } +@@ -595,9 +598,11 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) + return; + } + +- notify.perm = IOMMU_NONE; +- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); +- notify.perm = entry->perm; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.perm = IOMMU_NONE; ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); ++ event.type = IOMMU_NOTIFIER_MAP; ++ event.entry.perm = entry->perm; + } + + cache = g_new(S390IOTLBEntry, 1); +@@ -608,7 +613,7 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) + g_hash_table_replace(iommu->iotlb, &cache->iova, cache); + } + +- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); + } + + int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) +diff --git a/include/exec/memory.h b/include/exec/memory.h +index b6466ab6d57..80e36077cdb 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -106,6 +106,11 @@ struct IOMMUNotifier { + }; + typedef struct IOMMUNotifier IOMMUNotifier; + ++typedef struct IOMMUTLBEvent { ++ IOMMUNotifierFlag type; ++ IOMMUTLBEntry entry; ++} IOMMUTLBEvent; ++ + /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ + #define RAM_PREALLOC (1 << 0) + +@@ -1047,24 +1052,18 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr); + /** + * memory_region_notify_iommu: notify a change in an IOMMU translation entry. + * +- * The notification type will be decided by entry.perm bits: +- * +- * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE. +- * - For MAP (newly added entry) notifies: set entry.perm to the +- * permission of the page (which is definitely !IOMMU_NONE). +- * + * Note: for any IOMMU implementation, an in-place mapping change + * should be notified with an UNMAP followed by a MAP. + * + * @iommu_mr: the memory region that was changed + * @iommu_idx: the IOMMU index for the translation table which has changed +- * @entry: the new entry in the IOMMU translation table. The entry +- * replaces all old entries for the same virtual I/O address range. +- * Deleted entries have .@perm == 0. ++ * @event: TLB event with the new entry in the IOMMU translation table. ++ * The entry replaces all old entries for the same virtual I/O address ++ * range. + */ + void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + int iommu_idx, +- IOMMUTLBEntry entry); ++ IOMMUTLBEvent event); + + /** + * memory_region_notify_iommu_one: notify a change in an IOMMU translation +@@ -1074,12 +1073,12 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + * notifies a specific notifier, not all of them. + * + * @notifier: the notifier to be notified +- * @entry: the new entry in the IOMMU translation table. The entry +- * replaces all old entries for the same virtual I/O address range. +- * Deleted entries have .@perm == 0. ++ * @event: TLB event with the new entry in the IOMMU translation table. ++ * The entry replaces all old entries for the same virtual I/O address ++ * range. + */ + void memory_region_notify_iommu_one(IOMMUNotifier *notifier, +- IOMMUTLBEntry *entry); ++ IOMMUTLBEvent *event); + + /** + * memory_region_register_iommu_notifier: register a notifier for changes to +diff --git a/memory.c b/memory.c +index 43bd3359bf8..3bd99b8ac4a 100644 +--- a/memory.c ++++ b/memory.c +@@ -1912,11 +1912,15 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, + } + + void memory_region_notify_iommu_one(IOMMUNotifier *notifier, +- IOMMUTLBEntry *entry) ++ IOMMUTLBEvent *event) + { +- IOMMUNotifierFlag request_flags; ++ IOMMUTLBEntry *entry = &event->entry; + hwaddr entry_end = entry->iova + entry->addr_mask; + ++ if (event->type == IOMMU_NOTIFIER_UNMAP) { ++ assert(entry->perm == IOMMU_NONE); ++ } ++ + /* + * Skip the notification if the notification does not overlap + * with registered range. +@@ -1927,20 +1931,14 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + + assert(entry->iova >= notifier->start && entry_end <= notifier->end); + +- if (entry->perm & IOMMU_RW) { +- request_flags = IOMMU_NOTIFIER_MAP; +- } else { +- request_flags = IOMMU_NOTIFIER_UNMAP; +- } +- +- if (notifier->notifier_flags & request_flags) { ++ if (event->type & notifier->notifier_flags) { + notifier->notify(notifier, entry); + } + } + + void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + int iommu_idx, +- IOMMUTLBEntry entry) ++ IOMMUTLBEvent event) + { + IOMMUNotifier *iommu_notifier; + +@@ -1948,7 +1946,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { + if (iommu_notifier->iommu_idx == iommu_idx) { +- memory_region_notify_iommu_one(iommu_notifier, &entry); ++ memory_region_notify_iommu_one(iommu_notifier, &event); + } + } + } +-- +2.27.0 + diff --git a/SOURCES/kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch b/SOURCES/kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch new file mode 100644 index 0000000..89eb9c9 --- /dev/null +++ b/SOURCES/kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch @@ -0,0 +1,89 @@ +From f0fa537af2e1e5f827eeb74dc5b3e12776917a67 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:36 -0500 +Subject: [PATCH 12/17] memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP + IOMMUTLBNotificationType +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-12-eperezma@redhat.com> +Patchwork-id: 100604 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 11/13] memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP IOMMUTLBNotificationType +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +This allows us to differentiate between regular IOMMU map/unmap events +and DEVIOTLB unmap. Doing so, notifiers that only need device IOTLB +invalidations will not receive regular IOMMU unmappings. + +Adapt intel and vhost to use it. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit b68ba1ca57677acf870d5ab10579e6105c1f5338) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/intel_iommu.c | 2 +- + hw/virtio/vhost.c | 2 +- + include/exec/memory.h | 7 ++++++- + 3 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 9fedbac82de..3640bc2ed15 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -2411,7 +2411,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, + sz = VTD_PAGE_SIZE; + } + +- event.type = IOMMU_NOTIFIER_UNMAP; ++ event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP; + event.entry.target_as = &vtd_dev_as->as; + event.entry.addr_mask = sz - 1; + event.entry.iova = addr; +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 9182a00495e..78a5df3b379 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -704,7 +704,7 @@ static void vhost_iommu_region_add(MemoryListener *listener, + iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, + MEMTXATTRS_UNSPECIFIED); + iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify, +- IOMMU_NOTIFIER_UNMAP, ++ IOMMU_NOTIFIER_DEVIOTLB_UNMAP, + section->offset_within_region, + int128_get64(end), + iommu_idx); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 80e36077cdb..403dc0c0572 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -87,9 +87,14 @@ typedef enum { + IOMMU_NOTIFIER_UNMAP = 0x1, + /* Notify entry changes (newly created entries) */ + IOMMU_NOTIFIER_MAP = 0x2, ++ /* Notify changes on device IOTLB entries */ ++ IOMMU_NOTIFIER_DEVIOTLB_UNMAP = 0x04, + } IOMMUNotifierFlag; + +-#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) ++#define IOMMU_NOTIFIER_IOTLB_EVENTS (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) ++#define IOMMU_NOTIFIER_DEVIOTLB_EVENTS IOMMU_NOTIFIER_DEVIOTLB_UNMAP ++#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_IOTLB_EVENTS | \ ++ IOMMU_NOTIFIER_DEVIOTLB_EVENTS) + + struct IOMMUNotifier; + typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier, +-- +2.27.0 + diff --git a/SOURCES/kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch b/SOURCES/kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch new file mode 100644 index 0000000..8921c14 --- /dev/null +++ b/SOURCES/kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch @@ -0,0 +1,146 @@ +From e876535fd5ed10abf0dbeb55ec7098664412068e Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:34 -0500 +Subject: [PATCH 10/17] memory: Rename memory_region_notify_one to + memory_region_notify_iommu_one +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-10-eperezma@redhat.com> +Patchwork-id: 100602 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 09/13] memory: Rename memory_region_notify_one to memory_region_notify_iommu_one +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +Previous name didn't reflect the iommu operation. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: David Gibson +Reviewed-by: Juan Quintela +Reviewed-by: Eric Auger +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3b5ebf8532afdc1518bd8b0961ed802bc3f5f07c) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 2 +- + hw/arm/smmuv3.c | 2 +- + hw/i386/intel_iommu.c | 4 ++-- + include/exec/memory.h | 6 +++--- + memory.c | 6 +++--- + 5 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 9780404f002..dfabe381182 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -471,7 +471,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n) + entry.perm = IOMMU_NONE; + entry.addr_mask = n->end - n->start; + +- memory_region_notify_one(n, &entry); ++ memory_region_notify_iommu_one(n, &entry); + } + + /* Unmap all notifiers attached to @mr */ +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index a418fab2aa6..ef8a877c5d8 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -811,7 +811,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + entry.addr_mask = num_pages * (1 << granule) - 1; + entry.perm = IOMMU_NONE; + +- memory_region_notify_one(n, &entry); ++ memory_region_notify_iommu_one(n, &entry); + } + + /* invalidate an asid/iova range tuple in all mr's */ +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 43c94b993b4..463f107ad12 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -3431,7 +3431,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) + /* This field is meaningless for unmap */ + entry.translated_addr = 0; + +- memory_region_notify_one(n, &entry); ++ memory_region_notify_iommu_one(n, &entry); + + start += mask; + remain -= mask; +@@ -3469,7 +3469,7 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) + + static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) + { +- memory_region_notify_one((IOMMUNotifier *)private, entry); ++ memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); + return 0; + } + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index e499dc215b3..b6466ab6d57 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -226,7 +226,7 @@ enum IOMMUMemoryRegionAttr { + * The IOMMU implementation must use the IOMMU notifier infrastructure + * to report whenever mappings are changed, by calling + * memory_region_notify_iommu() (or, if necessary, by calling +- * memory_region_notify_one() for each registered notifier). ++ * memory_region_notify_iommu_one() for each registered notifier). + * + * Conceptually an IOMMU provides a mapping from input address + * to an output TLB entry. If the IOMMU is aware of memory transaction +@@ -1067,7 +1067,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + IOMMUTLBEntry entry); + + /** +- * memory_region_notify_one: notify a change in an IOMMU translation ++ * memory_region_notify_iommu_one: notify a change in an IOMMU translation + * entry to a single notifier + * + * This works just like memory_region_notify_iommu(), but it only +@@ -1078,7 +1078,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + * replaces all old entries for the same virtual I/O address range. + * Deleted entries have .@perm == 0. + */ +-void memory_region_notify_one(IOMMUNotifier *notifier, ++void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + IOMMUTLBEntry *entry); + + /** +diff --git a/memory.c b/memory.c +index 06484c2bff2..43bd3359bf8 100644 +--- a/memory.c ++++ b/memory.c +@@ -1911,8 +1911,8 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, + memory_region_update_iommu_notify_flags(iommu_mr, NULL); + } + +-void memory_region_notify_one(IOMMUNotifier *notifier, +- IOMMUTLBEntry *entry) ++void memory_region_notify_iommu_one(IOMMUNotifier *notifier, ++ IOMMUTLBEntry *entry) + { + IOMMUNotifierFlag request_flags; + hwaddr entry_end = entry->iova + entry->addr_mask; +@@ -1948,7 +1948,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { + if (iommu_notifier->iommu_idx == iommu_idx) { +- memory_region_notify_one(iommu_notifier, &entry); ++ memory_region_notify_iommu_one(iommu_notifier, &entry); + } + } + } +-- +2.27.0 + diff --git a/SOURCES/kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch b/SOURCES/kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch new file mode 100644 index 0000000..de56901 --- /dev/null +++ b/SOURCES/kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch @@ -0,0 +1,70 @@ +From 8c5154729effda3f762bfb8224f9c61dab8b2986 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 12 Jan 2021 14:36:38 -0500 +Subject: [PATCH 14/17] memory: Skip bad range assertion if notifier is + DEVIOTLB_UNMAP type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210112143638.374060-14-eperezma@redhat.com> +Patchwork-id: 100606 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 13/13] memory: Skip bad range assertion if notifier is DEVIOTLB_UNMAP type +Bugzilla: 1843852 +RH-Acked-by: Xiao Wang +RH-Acked-by: Peter Xu +RH-Acked-by: Auger Eric + +Device IOTLB invalidations can unmap arbitrary ranges, eiter outside of +the memory region or even [0, ~0ULL] for all the space. The assertion +could be hit by a guest, and rhel7 guest effectively hit it. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-6-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1804857f19f612f6907832e35599cdb51d4ec764) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + memory.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/memory.c b/memory.c +index 3bd99b8ac4a..5a4a80842d7 100644 +--- a/memory.c ++++ b/memory.c +@@ -1916,6 +1916,7 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + { + IOMMUTLBEntry *entry = &event->entry; + hwaddr entry_end = entry->iova + entry->addr_mask; ++ IOMMUTLBEntry tmp = *entry; + + if (event->type == IOMMU_NOTIFIER_UNMAP) { + assert(entry->perm == IOMMU_NONE); +@@ -1929,10 +1930,16 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + return; + } + +- assert(entry->iova >= notifier->start && entry_end <= notifier->end); ++ if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { ++ /* Crop (iova, addr_mask) to range */ ++ tmp.iova = MAX(tmp.iova, notifier->start); ++ tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; ++ } else { ++ assert(entry->iova >= notifier->start && entry_end <= notifier->end); ++ } + + if (event->type & notifier->notifier_flags) { +- notifier->notify(notifier, entry); ++ notifier->notify(notifier, &tmp); + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-memory-clamp-cached-translation-in-case-it-points-to.patch b/SOURCES/kvm-memory-clamp-cached-translation-in-case-it-points-to.patch new file mode 100644 index 0000000..8b8f67a --- /dev/null +++ b/SOURCES/kvm-memory-clamp-cached-translation-in-case-it-points-to.patch @@ -0,0 +1,87 @@ +From 354946f1e5fee0a69282bdf284c969b03a78a53e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Jan 2021 00:42:23 -0500 +Subject: [PATCH 15/17] memory: clamp cached translation in case it points to + an MMIO region + +RH-Author: Jon Maloy +Message-id: <20210113004223.871394-2-jmaloy@redhat.com> +Patchwork-id: 100618 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] memory: clamp cached translation in case it points to an MMIO region +Bugzilla: 1904393 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Thomas Huth + +From: Paolo Bonzini + +In using the address_space_translate_internal API, address_space_cache_init +forgot one piece of advice that can be found in the code for +address_space_translate_internal: + + /* MMIO registers can be expected to perform full-width accesses based only + * on their address, without considering adjacent registers that could + * decode to completely different MemoryRegions. When such registers + * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO + * regions overlap wildly. For this reason we cannot clamp the accesses + * here. + * + * If the length is small (as is the case for address_space_ldl/stl), + * everything works fine. If the incoming length is large, however, + * the caller really has to do the clamping through memory_access_size. + */ + +address_space_cache_init is exactly one such case where "the incoming length +is large", therefore we need to clamp the resulting length---not to +memory_access_size though, since we are not doing an access yet, but to +the size of the resulting section. This ensures that subsequent accesses +to the cached MemoryRegionSection will be in range. + +With this patch, the enclosed testcase notices that the used ring does +not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used" +error. + +Signed-off-by: Paolo Bonzini + +(cherry picked from 4bfb024bc76973d40a359476dc0291f46e435442) +- Manually applied to file exec.c, where the code to correct + is located in this version. +- Skipped the fuzzing test part, which is hard to apply on this code. +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + exec.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/exec.c b/exec.c +index ffdb5185353..09ed0cfc756 100644 +--- a/exec.c ++++ b/exec.c +@@ -3620,6 +3620,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, + AddressSpaceDispatch *d; + hwaddr l; + MemoryRegion *mr; ++ Int128 diff; + + assert(len > 0); + +@@ -3628,6 +3629,15 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, + d = flatview_to_dispatch(cache->fv); + cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); + ++ /* ++ * cache->xlat is now relative to cache->mrs.mr, not to the section itself. ++ * Take that into account to compute how many bytes are there between ++ * cache->xlat and the end of the section. ++ */ ++ diff = int128_sub(cache->mrs.size, ++ int128_make64(cache->xlat - cache->mrs.offset_within_region)); ++ l = int128_get64(int128_min(diff, int128_make64(l))); ++ + mr = cache->mrs.mr; + memory_region_ref(mr); + if (memory_access_is_direct(mr, is_write)) { +-- +2.27.0 + diff --git a/SOURCES/kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch b/SOURCES/kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch new file mode 100644 index 0000000..eb4e9af --- /dev/null +++ b/SOURCES/kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch @@ -0,0 +1,255 @@ +From 67878e1306f9ea6ccd30437327147c46de196a36 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:13 -0500 +Subject: [PATCH 13/18] misc: Replace zero-length arrays with flexible array + member (manual) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-10-thuth@redhat.com> +Patchwork-id: 99506 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 09/12] misc: Replace zero-length arrays with flexible array member (manual) +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Philippe Mathieu-Daudé + +Description copied from Linux kernel commit from Gustavo A. R. Silva +(see [3]): + +--v-- description start --v-- + + The current codebase makes use of the zero-length array language + extension to the C90 standard, but the preferred mechanism to + declare variable-length types such as these ones is a flexible + array member [1], introduced in C99: + + struct foo { + int stuff; + struct boo array[]; + }; + + By making use of the mechanism above, we will get a compiler + warning in case the flexible array does not occur last in the + structure, which will help us prevent some kind of undefined + behavior bugs from being unadvertenly introduced [2] to the + Linux codebase from now on. + +--^-- description end --^-- + +Do the similar housekeeping in the QEMU codebase (which uses +C99 since commit 7be41675f7cb). + +All these instances of code were found with the help of the +following command (then manual analysis, without modifying +structures only having a single flexible array member, such +QEDTable in block/qed.h): + + git grep -F '[0];' + +[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html +[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=76497732932f +[3] https://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git/commit/?id=17642a2fbd2c1 + +Inspired-by: Gustavo A. R. Silva +Reviewed-by: David Hildenbrand +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 880a7817c1a82a93d3f83dfb25dce1f0db629c66) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + block/vmdk.c | 2 +- + docs/interop/vhost-user.rst | 4 ++-- + hw/char/sclpconsole-lm.c | 2 +- + hw/char/sclpconsole.c | 2 +- + hw/s390x/virtio-ccw.c | 2 +- + include/hw/acpi/acpi-defs.h | 4 ++-- + include/hw/boards.h | 2 +- + include/hw/s390x/event-facility.h | 2 +- + include/hw/s390x/sclp.h | 8 ++++---- + target/s390x/ioinst.c | 2 +- + 10 files changed, 15 insertions(+), 15 deletions(-) + +diff --git a/block/vmdk.c b/block/vmdk.c +index 1bd39917290..8ec18f35a53 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -187,7 +187,7 @@ typedef struct VmdkMetaData { + typedef struct VmdkGrainMarker { + uint64_t lba; + uint32_t size; +- uint8_t data[0]; ++ uint8_t data[]; + } QEMU_PACKED VmdkGrainMarker; + + enum { +diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst +index 7827b710aa0..71b20ce83dd 100644 +--- a/docs/interop/vhost-user.rst ++++ b/docs/interop/vhost-user.rst +@@ -563,7 +563,7 @@ For split virtqueue, queue region can be implemented as: + uint16_t used_idx; + + /* Used to track the state of each descriptor in descriptor table */ +- DescStateSplit desc[0]; ++ DescStateSplit desc[]; + } QueueRegionSplit; + + To track inflight I/O, the queue region should be processed as follows: +@@ -685,7 +685,7 @@ For packed virtqueue, queue region can be implemented as: + uint8_t padding[7]; + + /* Used to track the state of each descriptor fetched from descriptor ring */ +- DescStatePacked desc[0]; ++ DescStatePacked desc[]; + } QueueRegionPacked; + + To track inflight I/O, the queue region should be processed as follows: +diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c +index 392606259d5..a9a6f2b204c 100644 +--- a/hw/char/sclpconsole-lm.c ++++ b/hw/char/sclpconsole-lm.c +@@ -31,7 +31,7 @@ + typedef struct OprtnsCommand { + EventBufferHeader header; + MDMSU message_unit; +- char data[0]; ++ char data[]; + } QEMU_PACKED OprtnsCommand; + + /* max size for line-mode data in 4K SCCB page */ +diff --git a/hw/char/sclpconsole.c b/hw/char/sclpconsole.c +index da126f0133f..55697130a0a 100644 +--- a/hw/char/sclpconsole.c ++++ b/hw/char/sclpconsole.c +@@ -25,7 +25,7 @@ + + typedef struct ASCIIConsoleData { + EventBufferHeader ebh; +- char data[0]; ++ char data[]; + } QEMU_PACKED ASCIIConsoleData; + + /* max size for ASCII data in 4K SCCB page */ +diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c +index 6580ce5907d..aa2c75a49c6 100644 +--- a/hw/s390x/virtio-ccw.c ++++ b/hw/s390x/virtio-ccw.c +@@ -193,7 +193,7 @@ typedef struct VirtioThinintInfo { + typedef struct VirtioRevInfo { + uint16_t revision; + uint16_t length; +- uint8_t data[0]; ++ uint8_t data[]; + } QEMU_PACKED VirtioRevInfo; + + /* Specify where the virtqueues for the subchannel are in guest memory. */ +diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h +index 57a3f58b0c9..b80188b430f 100644 +--- a/include/hw/acpi/acpi-defs.h ++++ b/include/hw/acpi/acpi-defs.h +@@ -152,7 +152,7 @@ typedef struct AcpiSerialPortConsoleRedirection + */ + struct AcpiRsdtDescriptorRev1 { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ +- uint32_t table_offset_entry[0]; /* Array of pointers to other */ ++ uint32_t table_offset_entry[]; /* Array of pointers to other */ + /* ACPI tables */ + } QEMU_PACKED; + typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; +@@ -162,7 +162,7 @@ typedef struct AcpiRsdtDescriptorRev1 AcpiRsdtDescriptorRev1; + */ + struct AcpiXsdtDescriptorRev2 { + ACPI_TABLE_HEADER_DEF /* ACPI common table header */ +- uint64_t table_offset_entry[0]; /* Array of pointers to other */ ++ uint64_t table_offset_entry[]; /* Array of pointers to other */ + /* ACPI tables */ + } QEMU_PACKED; + typedef struct AcpiXsdtDescriptorRev2 AcpiXsdtDescriptorRev2; +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 2920bdef5b4..a5e92f6c373 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -101,7 +101,7 @@ typedef struct CPUArchId { + */ + typedef struct { + int len; +- CPUArchId cpus[0]; ++ CPUArchId cpus[]; + } CPUArchIdList; + + /** +diff --git a/include/hw/s390x/event-facility.h b/include/hw/s390x/event-facility.h +index bdc32a3c091..700a610f33c 100644 +--- a/include/hw/s390x/event-facility.h ++++ b/include/hw/s390x/event-facility.h +@@ -122,7 +122,7 @@ typedef struct MDBO { + + typedef struct MDB { + MdbHeader header; +- MDBO mdbo[0]; ++ MDBO mdbo[]; + } QEMU_PACKED MDB; + + typedef struct SclpMsg { +diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h +index df2fa4169b0..62e2aa1d9f1 100644 +--- a/include/hw/s390x/sclp.h ++++ b/include/hw/s390x/sclp.h +@@ -133,7 +133,7 @@ typedef struct ReadInfo { + uint16_t highest_cpu; + uint8_t _reserved5[124 - 122]; /* 122-123 */ + uint32_t hmfai; +- struct CPUEntry entries[0]; ++ struct CPUEntry entries[]; + } QEMU_PACKED ReadInfo; + + typedef struct ReadCpuInfo { +@@ -143,7 +143,7 @@ typedef struct ReadCpuInfo { + uint16_t nr_standby; /* 12-13 */ + uint16_t offset_standby; /* 14-15 */ + uint8_t reserved0[24-16]; /* 16-23 */ +- struct CPUEntry entries[0]; ++ struct CPUEntry entries[]; + } QEMU_PACKED ReadCpuInfo; + + typedef struct ReadStorageElementInfo { +@@ -152,7 +152,7 @@ typedef struct ReadStorageElementInfo { + uint16_t assigned; + uint16_t standby; + uint8_t _reserved0[16 - 14]; /* 14-15 */ +- uint32_t entries[0]; ++ uint32_t entries[]; + } QEMU_PACKED ReadStorageElementInfo; + + typedef struct AttachStorageElement { +@@ -160,7 +160,7 @@ typedef struct AttachStorageElement { + uint8_t _reserved0[10 - 8]; /* 8-9 */ + uint16_t assigned; + uint8_t _reserved1[16 - 12]; /* 12-15 */ +- uint32_t entries[0]; ++ uint32_t entries[]; + } QEMU_PACKED AttachStorageElement; + + typedef struct AssignStorage { +diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c +index b6be300cc48..a412926d278 100644 +--- a/target/s390x/ioinst.c ++++ b/target/s390x/ioinst.c +@@ -387,7 +387,7 @@ typedef struct ChscResp { + uint16_t len; + uint16_t code; + uint32_t param; +- char data[0]; ++ char data[]; + } QEMU_PACKED ChscResp; + + #define CHSC_MIN_RESP_LEN 0x0008 +-- +2.27.0 + diff --git a/SOURCES/kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch b/SOURCES/kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch new file mode 100644 index 0000000..8334b7b --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch @@ -0,0 +1,87 @@ +From c6f62870f27ece45e944d1818f6aa04b3e024959 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 10 Dec 2020 08:32:41 -0500 +Subject: [PATCH 5/5] pc-bios: s390x: Clear out leftover S390EP string + +RH-Author: Thomas Huth +Message-id: <20201210083241.173509-5-thuth@redhat.com> +Patchwork-id: 100369 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 4/4] pc-bios: s390x: Clear out leftover S390EP string +Bugzilla: 1903135 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann +RH-Acked-by: David Hildenbrand + +From: Eric Farman + +A Linux binary will have the string "S390EP" at address 0x10008, +which is important in getting the guest up off the ground. In the +case of a reboot (specifically chreipl going to a new device), +we should defer to the PSW at address zero for the new config, +which will re-write "S390EP" from the new image. + +Let's clear it out at this point so that a reipl to, say, a DASD +passthrough device drives the IPL path from scratch without disrupting +disrupting the order of operations for other boots. + +Rather than hardcoding the address of this magic (again), let's +define it somewhere so that the two users are visibly related. + +Signed-off-by: Eric Farman +Message-Id: <20201120160117.59366-3-farman@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 3d6519968bb10260fc724c491fb4275f7c0b78ac) +Signed-off-by: Danilo C. L. de Paula +--- + pc-bios/s390-ccw/jump2ipl.c | 2 +- + pc-bios/s390-ccw/main.c | 6 ++++++ + pc-bios/s390-ccw/s390-arch.h | 3 +++ + 3 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c +index 767012bf0c9..6c6823b5db8 100644 +--- a/pc-bios/s390-ccw/jump2ipl.c ++++ b/pc-bios/s390-ccw/jump2ipl.c +@@ -78,7 +78,7 @@ void jump_to_low_kernel(void) + * kernel start address (when jumping to the PSW-at-zero address instead, + * the kernel startup code fails when we booted from a network device). + */ +- if (!memcmp((char *)0x10008, "S390EP", 6)) { ++ if (!memcmp((char *)S390EP, "S390EP", 6)) { + jump_to_IPL_code(KERN_IMAGE_START); + } + +diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c +index e3a1a3053d0..c04b910082b 100644 +--- a/pc-bios/s390-ccw/main.c ++++ b/pc-bios/s390-ccw/main.c +@@ -185,6 +185,12 @@ static void boot_setup(void) + memcpy(lpmsg + 10, loadparm_str, 8); + sclp_print(lpmsg); + ++ /* ++ * Clear out any potential S390EP magic (see jump_to_low_kernel()), ++ * so we don't taint our decision-making process during a reboot. ++ */ ++ memset((char *)S390EP, 0, 6); ++ + have_iplb = store_iplb(&iplb); + } + +diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h +index 6da44d4436c..a741488aaa1 100644 +--- a/pc-bios/s390-ccw/s390-arch.h ++++ b/pc-bios/s390-ccw/s390-arch.h +@@ -95,6 +95,9 @@ typedef struct LowCore { + + extern LowCore *lowcore; + ++/* Location of "S390EP" in a Linux binary (see arch/s390/boot/head.S) */ ++#define S390EP 0x10008 ++ + static inline void set_prefix(uint32_t address) + { + asm volatile("spx %0" : : "m" (address) : "memory"); +-- +2.27.0 + diff --git a/SOURCES/kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch b/SOURCES/kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch new file mode 100644 index 0000000..9d09be3 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch @@ -0,0 +1,63 @@ +From 6b19062226ecebf63d2d0b0ff05b5bcfa7a05818 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 10 Dec 2020 08:32:40 -0500 +Subject: [PATCH 4/5] pc-bios: s390x: Ensure Read IPL memory is clean + +RH-Author: Thomas Huth +Message-id: <20201210083241.173509-4-thuth@redhat.com> +Patchwork-id: 100372 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/4] pc-bios: s390x: Ensure Read IPL memory is clean +Bugzilla: 1903135 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann +RH-Acked-by: David Hildenbrand + +From: Eric Farman + +If, for example, we boot off a virtio device and chreipl to a vfio-ccw +device, the space at lowcore will be non-zero. We build a Read IPL CCW +at address zero, but it will have leftover PSW data that will conflict +with the Format-0 CCW being generated: + +0x0: 00080000 80010000 + ------ Ccw0.cda + -- Ccw0.chainData + -- Reserved bits + +The data address will be overwritten with the correct value (0x0), but +the apparent data chain bit will cause subsequent memory to be used as +the target of the data store, which may not be where we expect (0x0). + +Clear out this space when we boot from DASD, so that we know it exists +exactly as we expect. + +Signed-off-by: Eric Farman +Reviewed-by: Jason J. Herne +Reviewed-by: Janosch Frank +Acked-by: Christian Borntraeger +Acked-by: Cornelia Huck +Message-Id: <20201120160117.59366-2-farman@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit d8e5bbdd0d6fa8d9b5ac15de62c87105d92ff558) +Signed-off-by: Danilo C. L. de Paula +--- + pc-bios/s390-ccw/dasd-ipl.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/pc-bios/s390-ccw/dasd-ipl.c b/pc-bios/s390-ccw/dasd-ipl.c +index 0fc879bb8e8..71cbae2f16e 100644 +--- a/pc-bios/s390-ccw/dasd-ipl.c ++++ b/pc-bios/s390-ccw/dasd-ipl.c +@@ -100,6 +100,9 @@ static void make_readipl(void) + { + Ccw0 *ccwIplRead = (Ccw0 *)0x00; + ++ /* Clear out any existing data */ ++ memset(ccwIplRead, 0, sizeof(Ccw0)); ++ + /* Create Read IPL ccw at address 0 */ + ccwIplRead->cmd_code = CCW_CMD_READ_IPL; + ccwIplRead->cda = 0x00; /* Read into address 0x00 in main memory */ +-- +2.27.0 + diff --git a/SOURCES/kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch b/SOURCES/kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch new file mode 100644 index 0000000..8ba4530 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch @@ -0,0 +1,45 @@ +From 494ce6ed658a806af36d4f50600e44740a446011 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 10 Dec 2020 08:32:38 -0500 +Subject: [PATCH 2/5] pc-bios: s390x: Rename PSW_MASK_ZMODE to PSW_MASK_64 + +RH-Author: Thomas Huth +Message-id: <20201210083241.173509-2-thuth@redhat.com> +Patchwork-id: 100370 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/4] pc-bios: s390x: Rename PSW_MASK_ZMODE to PSW_MASK_64 +Bugzilla: 1903135 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +This constant enables 64 bit addressing, not the ESAME architecture, +so it shouldn't be named ZMODE. + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Message-Id: <20200624075226.92728-7-frankja@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit b88faa1c899db2fae8b5b168aeb6c47bef090f27) +Signed-off-by: Danilo C. L. de Paula +--- + pc-bios/s390-ccw/s390-arch.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h +index 5f36361c022..73852029d4e 100644 +--- a/pc-bios/s390-ccw/s390-arch.h ++++ b/pc-bios/s390-ccw/s390-arch.h +@@ -29,7 +29,7 @@ _Static_assert(sizeof(struct PSWLegacy) == 8, "PSWLegacy size incorrect"); + #define PSW_MASK_WAIT 0x0002000000000000ULL + #define PSW_MASK_EAMODE 0x0000000100000000ULL + #define PSW_MASK_BAMODE 0x0000000080000000ULL +-#define PSW_MASK_ZMODE (PSW_MASK_EAMODE | PSW_MASK_BAMODE) ++#define PSW_MASK_64 (PSW_MASK_EAMODE | PSW_MASK_BAMODE) + + /* Low core mapping */ + typedef struct LowCore { +-- +2.27.0 + diff --git a/SOURCES/kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch b/SOURCES/kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch new file mode 100644 index 0000000..576447d --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch @@ -0,0 +1,89 @@ +From 35891c9334058c02f3ee83eee1a986802387c18b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 10 Dec 2020 08:32:39 -0500 +Subject: [PATCH 3/5] pc-bios: s390x: Use PSW masks where possible and + introduce PSW_MASK_SHORT_ADDR + +RH-Author: Thomas Huth +Message-id: <20201210083241.173509-3-thuth@redhat.com> +Patchwork-id: 100371 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/4] pc-bios: s390x: Use PSW masks where possible and introduce PSW_MASK_SHORT_ADDR +Bugzilla: 1903135 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Let's move some of the PSW mask defines into s390-arch.h and use them +in jump2ipl.c. Also let's introduce a new constant for the address +mask of 8 byte (short) PSWs. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Thomas Huth +Message-Id: <20200624075226.92728-8-frankja@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit fe75c657b8ee962da79f5d3518b139e26dc69c24) +Signed-off-by: Danilo C. L. de Paula +--- + pc-bios/s390-ccw/jump2ipl.c | 10 ++++------ + pc-bios/s390-ccw/s390-arch.h | 2 ++ + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c +index 4eba2510b04..767012bf0c9 100644 +--- a/pc-bios/s390-ccw/jump2ipl.c ++++ b/pc-bios/s390-ccw/jump2ipl.c +@@ -8,12 +8,10 @@ + + #include "libc.h" + #include "s390-ccw.h" ++#include "s390-arch.h" + + #define KERN_IMAGE_START 0x010000UL +-#define PSW_MASK_64 0x0000000100000000ULL +-#define PSW_MASK_32 0x0000000080000000ULL +-#define PSW_MASK_SHORTPSW 0x0008000000000000ULL +-#define RESET_PSW_MASK (PSW_MASK_SHORTPSW | PSW_MASK_32 | PSW_MASK_64) ++#define RESET_PSW_MASK (PSW_MASK_SHORTPSW | PSW_MASK_64) + + typedef struct ResetInfo { + uint64_t ipl_psw; +@@ -54,7 +52,7 @@ void jump_to_IPL_code(uint64_t address) + + current->ipl_psw = (uint64_t) &jump_to_IPL_2; + current->ipl_psw |= RESET_PSW_MASK; +- current->ipl_continue = address & 0x7fffffff; ++ current->ipl_continue = address & PSW_MASK_SHORT_ADDR; + + debug_print_int("set IPL addr to", current->ipl_continue); + +@@ -86,7 +84,7 @@ void jump_to_low_kernel(void) + + /* Trying to get PSW at zero address */ + if (*((uint64_t *)0) & RESET_PSW_MASK) { +- jump_to_IPL_code((*((uint64_t *)0)) & 0x7fffffff); ++ jump_to_IPL_code((*((uint64_t *)0)) & PSW_MASK_SHORT_ADDR); + } + + /* No other option left, so use the Linux kernel start address */ +diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h +index 73852029d4e..6da44d4436c 100644 +--- a/pc-bios/s390-ccw/s390-arch.h ++++ b/pc-bios/s390-ccw/s390-arch.h +@@ -26,9 +26,11 @@ _Static_assert(sizeof(struct PSWLegacy) == 8, "PSWLegacy size incorrect"); + + /* s390 psw bit masks */ + #define PSW_MASK_IOINT 0x0200000000000000ULL ++#define PSW_MASK_SHORTPSW 0x0008000000000000ULL + #define PSW_MASK_WAIT 0x0002000000000000ULL + #define PSW_MASK_EAMODE 0x0000000100000000ULL + #define PSW_MASK_BAMODE 0x0000000080000000ULL ++#define PSW_MASK_SHORT_ADDR 0x000000007fffffffULL + #define PSW_MASK_64 (PSW_MASK_EAMODE | PSW_MASK_BAMODE) + + /* Low core mapping */ +-- +2.27.0 + diff --git a/SOURCES/kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch b/SOURCES/kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch new file mode 100644 index 0000000..380007c --- /dev/null +++ b/SOURCES/kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch @@ -0,0 +1,82 @@ +From 5b826e7ed09ecf3b2837d147fec6b593f629e450 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Fri, 4 Dec 2020 15:07:59 -0500 +Subject: [PATCH 01/14] ppc/spapr: Add hotremovable flag on DIMM LMBs on + drmem_v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +Message-id: <20201204150800.264829-2-gkurz@redhat.com> +Patchwork-id: 100217 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/2] ppc/spapr: Add hotremovable flag on DIMM LMBs on drmem_v2 +Bugzilla: 1901837 +RH-Acked-by: Danilo de Paula +RH-Acked-by: David Gibson +RH-Acked-by: Laurent Vivier + +From: Leonardo Bras + +On reboot, all memory that was previously added using object_add and +device_add is placed in this DIMM area. + +The new SPAPR_LMB_FLAGS_HOTREMOVABLE flag helps Linux to put this memory in +the correct memory zone, so no unmovable allocations are made there, +allowing the object to be easily hot-removed by device_del and +object_del. + +This new flag was accepted in Power Architecture documentation. + +Signed-off-by: Leonardo Bras +Reviewed-by: Bharata B Rao +Message-Id: <20200511200201.58537-1-leobras.c@gmail.com> +[dwg: Fixed syntax error spotted by Cédric Le Goater] +Signed-off-by: David Gibson +(cherry picked from commit 0911a60c76b8598f1863c6951b2b690059465153) +Signed-off-by: Greg Kurz + +Conflicts: + hw/ppc/pnv.c + +The changes in this file clearly don't belong to this +patch. Same goes for the changes in target/ppc/cpu.h and +target/ppc/excp_helper.c. Something went wrong when the +patch was applied. Anyway, downstream doesn't especially +care for pnv, so just drop the changes. + +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 3 ++- + include/hw/ppc/spapr.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a330f038b95..c74079702d0 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -690,7 +690,8 @@ static int spapr_populate_drmem_v2(SpaprMachineState *spapr, void *fdt, + g_assert(drc); + elem = spapr_get_drconf_cell(size / lmb_size, addr, + spapr_drc_index(drc), node, +- SPAPR_LMB_FLAGS_ASSIGNED); ++ (SPAPR_LMB_FLAGS_ASSIGNED | ++ SPAPR_LMB_FLAGS_HOTREMOVABLE)); + QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry); + nr_entries++; + cur_addr = addr + size; +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index aa89cc4a95c..e047dabf300 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -847,6 +847,7 @@ int spapr_rtc_import_offset(SpaprRtcState *rtc, int64_t legacy_offset); + #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 + #define SPAPR_LMB_FLAGS_DRC_INVALID 0x00000020 + #define SPAPR_LMB_FLAGS_RESERVED 0x00000080 ++#define SPAPR_LMB_FLAGS_HOTREMOVABLE 0x00000100 + + void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg); + +-- +2.27.0 + diff --git a/SOURCES/kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch b/SOURCES/kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch new file mode 100644 index 0000000..ee0b19a --- /dev/null +++ b/SOURCES/kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch @@ -0,0 +1,67 @@ +From e4065c7739c8ea3f6f88898295ed899a1059806e Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Fri, 4 Dec 2020 15:08:00 -0500 +Subject: [PATCH 02/14] ppc/spapr: re-assert IRQs during event-scan if there + are pending + +RH-Author: Greg Kurz +Message-id: <20201204150800.264829-3-gkurz@redhat.com> +Patchwork-id: 100216 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/2] ppc/spapr: re-assert IRQs during event-scan if there are pending +Bugzilla: 1901837 +RH-Acked-by: Danilo de Paula +RH-Acked-by: David Gibson +RH-Acked-by: Laurent Vivier + +From: Laurent Vivier + +If we hotplug a CPU during the first second of the kernel boot, +the IRQ can be sent to the kernel while the RTAS event handler +is not installed. The event is queued, but the kernel doesn't +collect it and ignores the new CPU. + +As the code relies on edge-triggered IRQ, we can re-assert it +during the event-scan RTAS call if there are still pending +events (as it is already done in check-exception). + +Signed-off-by: Laurent Vivier +Message-Id: <20201015210318.117386-1-lvivier@redhat.com> +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +(cherry picked from commit dff669d6a15fb92b063cb5aa691b4bb498727404) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_events.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c +index e355e000d07..15b92b63adb 100644 +--- a/hw/ppc/spapr_events.c ++++ b/hw/ppc/spapr_events.c +@@ -692,10 +692,22 @@ static void event_scan(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong args, + uint32_t nret, target_ulong rets) + { ++ int i; + if (nargs != 4 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } ++ ++ for (i = 0; i < EVENT_CLASS_MAX; i++) { ++ if (rtas_event_log_contains(EVENT_CLASS_MASK(i))) { ++ const SpaprEventSource *source = ++ spapr_event_sources_get_source(spapr->event_sources, i); ++ ++ g_assert(source->enabled); ++ qemu_irq_pulse(spapr_qirq(spapr, source->irq)); ++ } ++ } ++ + rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch b/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch new file mode 100644 index 0000000..bf296d8 --- /dev/null +++ b/SOURCES/kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch @@ -0,0 +1,237 @@ +From 34f664093db2a6275fcddd768684c7319cfc01b4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:06 -0500 +Subject: [PATCH 05/14] qapi: enable use of g_autoptr with QAPI types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-2-marcandre.lureau@redhat.com> +Patchwork-id: 100472 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 01/10] qapi: enable use of g_autoptr with QAPI types +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Daniel P. Berrangé + +Currently QAPI generates a type and function for free'ing it: + + typedef struct QCryptoBlockCreateOptions QCryptoBlockCreateOptions; + void qapi_free_QCryptoBlockCreateOptions(QCryptoBlockCreateOptions *obj); + +This is used in the traditional manner: + + QCryptoBlockCreateOptions *opts = NULL; + + opts = g_new0(QCryptoBlockCreateOptions, 1); + + ....do stuff with opts... + + qapi_free_QCryptoBlockCreateOptions(opts); + +Since bumping the min glib to 2.48, QEMU has incrementally adopted the +use of g_auto/g_autoptr. This allows the compiler to run a function to +free a variable when it goes out of scope, the benefit being the +compiler can guarantee it is freed in all possible code ptahs. + +This benefit is applicable to QAPI types too, and given the seriously +long method names for some qapi_free_XXXX() functions, is much less +typing. This change thus makes the code generator emit: + + G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlockCreateOptions, + qapi_free_QCryptoBlockCreateOptions) + +The above code example now becomes + + g_autoptr(QCryptoBlockCreateOptions) opts = NULL; + + opts = g_new0(QCryptoBlockCreateOptions, 1); + + ....do stuff with opts... + +Note, if the local pointer needs to live beyond the scope holding the +variable, then g_steal_pointer can be used. This is useful to return the +pointer to the caller in the success codepath, while letting it be freed +in all error codepaths. + + return g_steal_pointer(&opts); + +The crypto/block.h header needs updating to avoid symbol clash now that +the g_autoptr support is a standard QAPI feature. + +Signed-off-by: Daniel P. Berrangé +Message-Id: <20200723153845.2934357-1-berrange@redhat.com> +Reviewed-by: Markus Armbruster +Reviewed-by: Eric Blake +Signed-off-by: Markus Armbruster + +(cherry picked from commit 221db5daf6b3666f1c8e4ca06ae45892e99a112f) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + docs/devel/qapi-code-gen.txt | 2 ++ + scripts/qapi/types.py | 1 + + tests/test-qobject-input-visitor.c | 23 +++++++---------------- + 3 files changed, 10 insertions(+), 16 deletions(-) + +diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt +index 45c93a43cc3..ca59c695fac 100644 +--- a/docs/devel/qapi-code-gen.txt ++++ b/docs/devel/qapi-code-gen.txt +@@ -1278,6 +1278,7 @@ Example: + }; + + void qapi_free_UserDefOne(UserDefOne *obj); ++ G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOne, qapi_free_UserDefOne) + + struct UserDefOneList { + UserDefOneList *next; +@@ -1285,6 +1286,7 @@ Example: + }; + + void qapi_free_UserDefOneList(UserDefOneList *obj); ++ G_DEFINE_AUTOPTR_CLEANUP_FUNC(UserDefOneList, qapi_free_UserDefOneList) + + struct q_obj_my_command_arg { + UserDefOneList *arg1; +diff --git a/scripts/qapi/types.py b/scripts/qapi/types.py +index d8751daa049..c3be141dc90 100644 +--- a/scripts/qapi/types.py ++++ b/scripts/qapi/types.py +@@ -213,6 +213,7 @@ def gen_type_cleanup_decl(name): + ret = mcgen(''' + + void qapi_free_%(c_name)s(%(c_name)s *obj); ++G_DEFINE_AUTOPTR_CLEANUP_FUNC(%(c_name)s, qapi_free_%(c_name)s) + ''', + c_name=c_name(name)) + return ret +diff --git a/tests/test-qobject-input-visitor.c b/tests/test-qobject-input-visitor.c +index 6bacabf0632..e41b91a2a6f 100644 +--- a/tests/test-qobject-input-visitor.c ++++ b/tests/test-qobject-input-visitor.c +@@ -417,7 +417,7 @@ static void test_visitor_in_struct(TestInputVisitorData *data, + static void test_visitor_in_struct_nested(TestInputVisitorData *data, + const void *unused) + { +- UserDefTwo *udp = NULL; ++ g_autoptr(UserDefTwo) udp = NULL; + Visitor *v; + + v = visitor_input_test_init(data, "{ 'string0': 'string0', " +@@ -433,8 +433,6 @@ static void test_visitor_in_struct_nested(TestInputVisitorData *data, + g_assert_cmpstr(udp->dict1->dict2->userdef->string, ==, "string"); + g_assert_cmpstr(udp->dict1->dict2->string, ==, "string2"); + g_assert(udp->dict1->has_dict3 == false); +- +- qapi_free_UserDefTwo(udp); + } + + static void test_visitor_in_list(TestInputVisitorData *data, +@@ -546,7 +544,7 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, + const void *unused) + { + Visitor *v; +- UserDefFlatUnion *tmp; ++ g_autoptr(UserDefFlatUnion) tmp = NULL; + UserDefUnionBase *base; + + v = visitor_input_test_init(data, +@@ -563,8 +561,6 @@ static void test_visitor_in_union_flat(TestInputVisitorData *data, + + base = qapi_UserDefFlatUnion_base(tmp); + g_assert(&base->enum1 == &tmp->enum1); +- +- qapi_free_UserDefFlatUnion(tmp); + } + + static void test_visitor_in_alternate(TestInputVisitorData *data, +@@ -690,7 +686,7 @@ static void test_list_union_integer_helper(TestInputVisitorData *data, + const void *unused, + UserDefListUnionKind kind) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); + GString *gstr_union = g_string_new(""); +@@ -782,7 +778,6 @@ static void test_list_union_integer_helper(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + static void test_visitor_in_list_union_int(TestInputVisitorData *data, +@@ -851,7 +846,7 @@ static void test_visitor_in_list_union_uint64(TestInputVisitorData *data, + static void test_visitor_in_list_union_bool(TestInputVisitorData *data, + const void *unused) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + boolList *elem = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); +@@ -879,13 +874,12 @@ static void test_visitor_in_list_union_bool(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + static void test_visitor_in_list_union_string(TestInputVisitorData *data, + const void *unused) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + strList *elem = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); +@@ -914,7 +908,6 @@ static void test_visitor_in_list_union_string(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + #define DOUBLE_STR_MAX 16 +@@ -922,7 +915,7 @@ static void test_visitor_in_list_union_string(TestInputVisitorData *data, + static void test_visitor_in_list_union_number(TestInputVisitorData *data, + const void *unused) + { +- UserDefListUnion *cvalue = NULL; ++ g_autoptr(UserDefListUnion) cvalue = NULL; + numberList *elem = NULL; + Visitor *v; + GString *gstr_list = g_string_new(""); +@@ -957,7 +950,6 @@ static void test_visitor_in_list_union_number(TestInputVisitorData *data, + + g_string_free(gstr_union, true); + g_string_free(gstr_list, true); +- qapi_free_UserDefListUnion(cvalue); + } + + static void input_visitor_test_add(const char *testpath, +@@ -1253,7 +1245,7 @@ static void test_visitor_in_fail_alternate(TestInputVisitorData *data, + static void do_test_visitor_in_qmp_introspect(TestInputVisitorData *data, + const QLitObject *qlit) + { +- SchemaInfoList *schema = NULL; ++ g_autoptr(SchemaInfoList) schema = NULL; + QObject *obj = qobject_from_qlit(qlit); + Visitor *v; + +@@ -1262,7 +1254,6 @@ static void do_test_visitor_in_qmp_introspect(TestInputVisitorData *data, + visit_type_SchemaInfoList(v, NULL, &schema, &error_abort); + g_assert(schema); + +- qapi_free_SchemaInfoList(schema); + qobject_unref(obj); + visit_free(v); + } +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch b/SOURCES/kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch new file mode 100644 index 0000000..3b533a5 --- /dev/null +++ b/SOURCES/kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch @@ -0,0 +1,73 @@ +From c5f90436555d7ab2c1c28bf1cfdb5f5f8ca97816 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 24 Dec 2020 12:53:04 -0500 +Subject: [PATCH 4/5] qga: Use qemu_get_host_name() instead of + g_get_host_name() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201224125304.62697-4-marcandre.lureau@redhat.com> +Patchwork-id: 100500 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/3] qga: Use qemu_get_host_name() instead of g_get_host_name() +Bugzilla: 1910326 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Philippe Mathieu-Daudé + +From: Michal Privoznik + +Problem with g_get_host_name() is that on the first call it saves +the hostname into a global variable and from then on, every +subsequent call returns the saved hostname. Even if the hostname +changes. This doesn't play nicely with guest agent, because if +the hostname is acquired before the guest is set up (e.g. on the +first boot, or before DHCP) we will report old, invalid hostname. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1845127 + +Signed-off-by: Michal Privoznik +Reviewed-by: Daniel P. Berrangé +Cc: qemu-stable@nongnu.org +Signed-off-by: Michael Roth + +(cherry picked from commit 0d3a8f32b1e0eca279da1b0cc793efc7250c3daf) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/qga/commands.c b/qga/commands.c +index 43c323ceada..93bed292d08 100644 +--- a/qga/commands.c ++++ b/qga/commands.c +@@ -502,11 +502,20 @@ int ga_parse_whence(GuestFileWhence *whence, Error **errp) + GuestHostName *qmp_guest_get_host_name(Error **errp) + { + GuestHostName *result = NULL; +- gchar const *hostname = g_get_host_name(); +- if (hostname != NULL) { +- result = g_new0(GuestHostName, 1); +- result->host_name = g_strdup(hostname); ++ g_autofree char *hostname = qemu_get_host_name(errp); ++ ++ /* ++ * We want to avoid using g_get_host_name() because that ++ * caches the result and we wouldn't reflect changes in the ++ * host name. ++ */ ++ ++ if (!hostname) { ++ hostname = g_strdup("localhost"); + } ++ ++ result = g_new0(GuestHostName, 1); ++ result->host_name = g_steal_pointer(&hostname); + return result; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-add-command-guest-get-disks.patch b/SOURCES/kvm-qga-add-command-guest-get-disks.patch new file mode 100644 index 0000000..360301d --- /dev/null +++ b/SOURCES/kvm-qga-add-command-guest-get-disks.patch @@ -0,0 +1,115 @@ +From 58688d868656e77f67ea915544b0bb3bb60f33d8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:11 -0500 +Subject: [PATCH 10/14] qga: add command guest-get-disks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-7-marcandre.lureau@redhat.com> +Patchwork-id: 100475 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 06/10] qga: add command guest-get-disks +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Tomáš Golembiovský + +Add API and stubs for new guest-get-disks command. + +The command guest-get-fsinfo can be used to list information about disks +and partitions but it is limited only to mounted disks with filesystem. +This new command should allow listing information about disks of the VM +regardles whether they are mounted or not. This can be usefull for +management applications for mapping virtualized devices or pass-through +devices to device names in the guest OS. + +Signed-off-by: Tomáš Golembiovský +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Marc-André Lureau +Signed-off-by: Michael Roth + +(cherry-picked from commit c27ea3f9ef7c7f29e55bde91879f8514abce9c38) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 6 ++++++ + qga/commands-win32.c | 6 ++++++ + qga/qapi-schema.json | 31 +++++++++++++++++++++++++++++++ + 3 files changed, 43 insertions(+) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index c86c87ed522..5095104afc0 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -3039,3 +3039,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} +diff --git a/qga/commands-win32.c b/qga/commands-win32.c +index 55ba5b263af..be63fa2b208 100644 +--- a/qga/commands-win32.c ++++ b/qga/commands-win32.c +@@ -2234,3 +2234,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} +diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json +index fb4605cc19c..22df375c92f 100644 +--- a/qga/qapi-schema.json ++++ b/qga/qapi-schema.json +@@ -852,6 +852,37 @@ + 'bus': 'int', 'target': 'int', 'unit': 'int', + '*serial': 'str', '*dev': 'str'} } + ++## ++# @GuestDiskInfo: ++# ++# @name: device node (Linux) or device UNC (Windows) ++# @partition: whether this is a partition or disk ++# @dependents: list of dependent devices; e.g. for LVs of the LVM this will ++# hold the list of PVs, for LUKS encrypted volume this will ++# contain the disk where the volume is placed. (Linux) ++# @address: disk address information (only for non-virtual devices) ++# @alias: optional alias assigned to the disk, on Linux this is a name assigned ++# by device mapper ++# ++# Since 5.2 ++## ++{ 'struct': 'GuestDiskInfo', ++ 'data': {'name': 'str', 'partition': 'bool', 'dependents': ['str'], ++ '*address': 'GuestDiskAddress', '*alias': 'str'} } ++ ++## ++# @guest-get-disks: ++# ++# Returns: The list of disks in the guest. For Windows these are only the ++# physical disks. On Linux these are all root block devices of ++# non-zero size including e.g. removable devices, loop devices, ++# NBD, etc. ++# ++# Since: 5.2 ++## ++{ 'command': 'guest-get-disks', ++ 'returns': ['GuestDiskInfo'] } ++ + ## + # @GuestFilesystemInfo: + # +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch new file mode 100644 index 0000000..939a212 --- /dev/null +++ b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch @@ -0,0 +1,427 @@ +From 086957b970a8f4165249589e2bc0cc08d1800db3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:12 -0500 +Subject: [PATCH 11/14] qga: add implementation of guest-get-disks for Linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-8-marcandre.lureau@redhat.com> +Patchwork-id: 100478 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 07/10] qga: add implementation of guest-get-disks for Linux +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Tomáš Golembiovský + +The command lists all disks (real and virtual) as well as disk +partitions. For each disk the list of dependent disks is also listed and +/dev path is used as a handle so it can be matched with "name" field of +other returned disk entries. For disk partitions the "dependents" list +is populated with the the parent device for easier tracking of +hierarchy. + +Example output: +{ + "return": [ + ... + { + "name": "/dev/dm-0", + "partition": false, + "dependents": [ + "/dev/sda2" + ], + "alias": "luks-7062202e-5b9b-433e-81e8-6628c40da9f7" + }, + { + "name": "/dev/sda2", + "partition": true, + "dependents": [ + "/dev/sda" + ] + }, + { + "name": "/dev/sda", + "partition": false, + "address": { + "serial": "SAMSUNG_MZ7LN512HCHP-000L1_S1ZKNXAG822493", + "bus-type": "sata", + ... + "dev": "/dev/sda", + "target": 0 + }, + "dependents": [] + }, + ... + ] +} + +Signed-off-by: Tomáš Golembiovský +Reviewed-by: Marc-André Lureau +*add missing stub for !defined(CONFIG_FSFREEZE) +*remove unused deps_dir variable +Signed-off-by: Michael Roth +(cherry picked from commit fed3956429d560a06fc2d2fcf1a01efb58659f87) +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 303 +++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 292 insertions(+), 11 deletions(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 5095104afc0..96f5ddafd3a 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1152,13 +1152,27 @@ static void build_guest_fsinfo_for_virtual_device(char const *syspath, + closedir(dir); + } + ++static bool is_disk_virtual(const char *devpath, Error **errp) ++{ ++ g_autofree char *syspath = realpath(devpath, NULL); ++ ++ if (!syspath) { ++ error_setg_errno(errp, errno, "realpath(\"%s\")", devpath); ++ return false; ++ } ++ return strstr(syspath, "/devices/virtual/block/") != NULL; ++} ++ + /* Dispatch to functions for virtual/real device */ + static void build_guest_fsinfo_for_device(char const *devpath, + GuestFilesystemInfo *fs, + Error **errp) + { +- char *syspath = realpath(devpath, NULL); ++ ERRP_GUARD(); ++ g_autofree char *syspath = NULL; ++ bool is_virtual = false; + ++ syspath = realpath(devpath, NULL); + if (!syspath) { + error_setg_errno(errp, errno, "realpath(\"%s\")", devpath); + return; +@@ -1169,16 +1183,281 @@ static void build_guest_fsinfo_for_device(char const *devpath, + } + + g_debug(" parse sysfs path '%s'", syspath); +- +- if (strstr(syspath, "/devices/virtual/block/")) { ++ is_virtual = is_disk_virtual(syspath, errp); ++ if (*errp != NULL) { ++ return; ++ } ++ if (is_virtual) { + build_guest_fsinfo_for_virtual_device(syspath, fs, errp); + } else { + build_guest_fsinfo_for_real_device(syspath, fs, errp); + } ++} ++ ++#ifdef CONFIG_LIBUDEV ++ ++/* ++ * Wrapper around build_guest_fsinfo_for_device() for getting just ++ * the disk address. ++ */ ++static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp) ++{ ++ g_autoptr(GuestFilesystemInfo) fs = NULL; + +- free(syspath); ++ fs = g_new0(GuestFilesystemInfo, 1); ++ build_guest_fsinfo_for_device(syspath, fs, errp); ++ if (fs->disk != NULL) { ++ return g_steal_pointer(&fs->disk->value); ++ } ++ return NULL; + } + ++static char *get_alias_for_syspath(const char *syspath) ++{ ++ struct udev *udev = NULL; ++ struct udev_device *udevice = NULL; ++ char *ret = NULL; ++ ++ udev = udev_new(); ++ if (udev == NULL) { ++ g_debug("failed to query udev"); ++ goto out; ++ } ++ udevice = udev_device_new_from_syspath(udev, syspath); ++ if (udevice == NULL) { ++ g_debug("failed to query udev for path: %s", syspath); ++ goto out; ++ } else { ++ const char *alias = udev_device_get_property_value( ++ udevice, "DM_NAME"); ++ /* ++ * NULL means there was an error and empty string means there is no ++ * alias. In case of no alias we return NULL instead of empty string. ++ */ ++ if (alias == NULL) { ++ g_debug("failed to query udev for device alias for: %s", ++ syspath); ++ } else if (*alias != 0) { ++ ret = g_strdup(alias); ++ } ++ } ++ ++out: ++ udev_unref(udev); ++ udev_device_unref(udevice); ++ return ret; ++} ++ ++static char *get_device_for_syspath(const char *syspath) ++{ ++ struct udev *udev = NULL; ++ struct udev_device *udevice = NULL; ++ char *ret = NULL; ++ ++ udev = udev_new(); ++ if (udev == NULL) { ++ g_debug("failed to query udev"); ++ goto out; ++ } ++ udevice = udev_device_new_from_syspath(udev, syspath); ++ if (udevice == NULL) { ++ g_debug("failed to query udev for path: %s", syspath); ++ goto out; ++ } else { ++ ret = g_strdup(udev_device_get_devnode(udevice)); ++ } ++ ++out: ++ udev_unref(udev); ++ udev_device_unref(udevice); ++ return ret; ++} ++ ++static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) ++{ ++ g_autofree char *deps_dir = NULL; ++ const gchar *dep; ++ GDir *dp_deps = NULL; ++ ++ /* List dependent disks */ ++ deps_dir = g_strdup_printf("%s/slaves", disk_dir); ++ g_debug(" listing entries in: %s", deps_dir); ++ dp_deps = g_dir_open(deps_dir, 0, NULL); ++ if (dp_deps == NULL) { ++ g_debug("failed to list entries in %s", deps_dir); ++ return; ++ } ++ while ((dep = g_dir_read_name(dp_deps)) != NULL) { ++ g_autofree char *dep_dir = NULL; ++ strList *dep_item = NULL; ++ char *dev_name; ++ ++ /* Add dependent disks */ ++ dep_dir = g_strdup_printf("%s/%s", deps_dir, dep); ++ dev_name = get_device_for_syspath(dep_dir); ++ if (dev_name != NULL) { ++ g_debug(" adding dependent device: %s", dev_name); ++ dep_item = g_new0(strList, 1); ++ dep_item->value = dev_name; ++ dep_item->next = disk->dependents; ++ disk->dependents = dep_item; ++ } ++ } ++ g_dir_close(dp_deps); ++} ++ ++/* ++ * Detect partitions subdirectory, name is "" or ++ * "p" ++ * ++ * @disk_name -- last component of /sys path (e.g. sda) ++ * @disk_dir -- sys path of the disk (e.g. /sys/block/sda) ++ * @disk_dev -- device node of the disk (e.g. /dev/sda) ++ */ ++static GuestDiskInfoList *get_disk_partitions( ++ GuestDiskInfoList *list, ++ const char *disk_name, const char *disk_dir, ++ const char *disk_dev) ++{ ++ GuestDiskInfoList *item, *ret = list; ++ struct dirent *de_disk; ++ DIR *dp_disk = NULL; ++ size_t len = strlen(disk_name); ++ ++ dp_disk = opendir(disk_dir); ++ while ((de_disk = readdir(dp_disk)) != NULL) { ++ g_autofree char *partition_dir = NULL; ++ char *dev_name; ++ GuestDiskInfo *partition; ++ ++ if (!(de_disk->d_type & DT_DIR)) { ++ continue; ++ } ++ ++ if (!(strncmp(disk_name, de_disk->d_name, len) == 0 && ++ ((*(de_disk->d_name + len) == 'p' && ++ isdigit(*(de_disk->d_name + len + 1))) || ++ isdigit(*(de_disk->d_name + len))))) { ++ continue; ++ } ++ ++ partition_dir = g_strdup_printf("%s/%s", ++ disk_dir, de_disk->d_name); ++ dev_name = get_device_for_syspath(partition_dir); ++ if (dev_name == NULL) { ++ g_debug("Failed to get device name for syspath: %s", ++ disk_dir); ++ continue; ++ } ++ partition = g_new0(GuestDiskInfo, 1); ++ partition->name = dev_name; ++ partition->partition = true; ++ /* Add parent disk as dependent for easier tracking of hierarchy */ ++ partition->dependents = g_new0(strList, 1); ++ partition->dependents->value = g_strdup(disk_dev); ++ ++ item = g_new0(GuestDiskInfoList, 1); ++ item->value = partition; ++ item->next = ret; ++ ret = item; ++ ++ } ++ closedir(dp_disk); ++ ++ return ret; ++} ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ GuestDiskInfoList *item, *ret = NULL; ++ GuestDiskInfo *disk; ++ DIR *dp = NULL; ++ struct dirent *de = NULL; ++ ++ g_debug("listing /sys/block directory"); ++ dp = opendir("/sys/block"); ++ if (dp == NULL) { ++ error_setg_errno(errp, errno, "Can't open directory \"/sys/block\""); ++ return NULL; ++ } ++ while ((de = readdir(dp)) != NULL) { ++ g_autofree char *disk_dir = NULL, *line = NULL, ++ *size_path = NULL; ++ char *dev_name; ++ Error *local_err = NULL; ++ if (de->d_type != DT_LNK) { ++ g_debug(" skipping entry: %s", de->d_name); ++ continue; ++ } ++ ++ /* Check size and skip zero-sized disks */ ++ g_debug(" checking disk size"); ++ size_path = g_strdup_printf("/sys/block/%s/size", de->d_name); ++ if (!g_file_get_contents(size_path, &line, NULL, NULL)) { ++ g_debug(" failed to read disk size"); ++ continue; ++ } ++ if (g_strcmp0(line, "0\n") == 0) { ++ g_debug(" skipping zero-sized disk"); ++ continue; ++ } ++ ++ g_debug(" adding %s", de->d_name); ++ disk_dir = g_strdup_printf("/sys/block/%s", de->d_name); ++ dev_name = get_device_for_syspath(disk_dir); ++ if (dev_name == NULL) { ++ g_debug("Failed to get device name for syspath: %s", ++ disk_dir); ++ continue; ++ } ++ disk = g_new0(GuestDiskInfo, 1); ++ disk->name = dev_name; ++ disk->partition = false; ++ disk->alias = get_alias_for_syspath(disk_dir); ++ disk->has_alias = (disk->alias != NULL); ++ item = g_new0(GuestDiskInfoList, 1); ++ item->value = disk; ++ item->next = ret; ++ ret = item; ++ ++ /* Get address for non-virtual devices */ ++ bool is_virtual = is_disk_virtual(disk_dir, &local_err); ++ if (local_err != NULL) { ++ g_debug(" failed to check disk path, ignoring error: %s", ++ error_get_pretty(local_err)); ++ error_free(local_err); ++ local_err = NULL; ++ /* Don't try to get the address */ ++ is_virtual = true; ++ } ++ if (!is_virtual) { ++ disk->address = get_disk_address(disk_dir, &local_err); ++ if (local_err != NULL) { ++ g_debug(" failed to get device info, ignoring error: %s", ++ error_get_pretty(local_err)); ++ error_free(local_err); ++ local_err = NULL; ++ } else if (disk->address != NULL) { ++ disk->has_address = true; ++ } ++ } ++ ++ get_disk_deps(disk_dir, disk); ++ ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name); ++ } ++ return ret; ++} ++ ++#else ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} ++ ++#endif ++ + /* Return a list of the disk device(s)' info which @mount lies on */ + static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount, + Error **errp) +@@ -2770,6 +3049,13 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp) + + return 0; + } ++ ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} ++ + #endif /* CONFIG_FSFREEZE */ + + #if !defined(CONFIG_FSTRIM) +@@ -2806,7 +3092,8 @@ GList *ga_command_blacklist_init(GList *blacklist) + const char *list[] = { + "guest-get-fsinfo", "guest-fsfreeze-status", + "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list", +- "guest-fsfreeze-thaw", "guest-get-fsinfo", NULL}; ++ "guest-fsfreeze-thaw", "guest-get-fsinfo", ++ "guest-get-disks", NULL}; + char **p = (char **)list; + + while (*p) { +@@ -3039,9 +3326,3 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } +- +-GuestDiskInfoList *qmp_guest_get_disks(Error **errp) +-{ +- error_setg(errp, QERR_UNSUPPORTED); +- return NULL; +-} +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch new file mode 100644 index 0000000..f82d95d --- /dev/null +++ b/SOURCES/kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch @@ -0,0 +1,181 @@ +From 925163bf8498e26c19742dbd34b6b324e49c07b6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:13 -0500 +Subject: [PATCH 12/14] qga: add implementation of guest-get-disks for Windows +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-9-marcandre.lureau@redhat.com> +Patchwork-id: 100479 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 08/10] qga: add implementation of guest-get-disks for Windows +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Tomáš Golembiovský + +The command lists all the physical disk drives. Unlike for Linux +partitions and virtual volumes are not listed. + +Example output: + +{ + "return": [ + { + "name": "\\\\.\\PhysicalDrive0", + "partition": false, + "address": { + "serial": "QM00001", + "bus-type": "sata", + ... + }, + "dependents": [] + } + ] +} + +Signed-off-by: Tomáš Golembiovský +Signed-off-by: Michael Roth + +(cherry picked from commit c67d2efd9d1771fd886e3b58771adaa62897f3d9) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-win32.c | 107 ++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 101 insertions(+), 6 deletions(-) + +diff --git a/qga/commands-win32.c b/qga/commands-win32.c +index be63fa2b208..a07725e874b 100644 +--- a/qga/commands-win32.c ++++ b/qga/commands-win32.c +@@ -960,6 +960,101 @@ out: + return list; + } + ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ ERRP_GUARD(); ++ GuestDiskInfoList *new = NULL, *ret = NULL; ++ HDEVINFO dev_info; ++ SP_DEVICE_INTERFACE_DATA dev_iface_data; ++ int i; ++ ++ dev_info = SetupDiGetClassDevs(&GUID_DEVINTERFACE_DISK, 0, 0, ++ DIGCF_PRESENT | DIGCF_DEVICEINTERFACE); ++ if (dev_info == INVALID_HANDLE_VALUE) { ++ error_setg_win32(errp, GetLastError(), "failed to get device tree"); ++ return NULL; ++ } ++ ++ g_debug("enumerating devices"); ++ dev_iface_data.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA); ++ for (i = 0; ++ SetupDiEnumDeviceInterfaces(dev_info, NULL, &GUID_DEVINTERFACE_DISK, ++ i, &dev_iface_data); ++ i++) { ++ GuestDiskAddress *address = NULL; ++ GuestDiskInfo *disk = NULL; ++ Error *local_err = NULL; ++ g_autofree PSP_DEVICE_INTERFACE_DETAIL_DATA ++ pdev_iface_detail_data = NULL; ++ STORAGE_DEVICE_NUMBER sdn; ++ HANDLE dev_file; ++ DWORD size = 0; ++ BOOL result; ++ int attempt; ++ ++ g_debug(" getting device path"); ++ for (attempt = 0, result = FALSE; attempt < 2 && !result; attempt++) { ++ result = SetupDiGetDeviceInterfaceDetail(dev_info, ++ &dev_iface_data, pdev_iface_detail_data, size, &size, NULL); ++ if (result) { ++ break; ++ } ++ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { ++ pdev_iface_detail_data = g_realloc(pdev_iface_detail_data, ++ size); ++ pdev_iface_detail_data->cbSize = ++ sizeof(*pdev_iface_detail_data); ++ } else { ++ g_debug("failed to get device interface details"); ++ break; ++ } ++ } ++ if (!result) { ++ g_debug("skipping device"); ++ continue; ++ } ++ ++ g_debug(" device: %s", pdev_iface_detail_data->DevicePath); ++ dev_file = CreateFile(pdev_iface_detail_data->DevicePath, 0, ++ FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL); ++ if (!DeviceIoControl(dev_file, IOCTL_STORAGE_GET_DEVICE_NUMBER, ++ NULL, 0, &sdn, sizeof(sdn), &size, NULL)) { ++ CloseHandle(dev_file); ++ debug_error("failed to get storage device number"); ++ continue; ++ } ++ CloseHandle(dev_file); ++ ++ disk = g_new0(GuestDiskInfo, 1); ++ disk->name = g_strdup_printf("\\\\.\\PhysicalDrive%lu", ++ sdn.DeviceNumber); ++ ++ g_debug(" number: %lu", sdn.DeviceNumber); ++ address = g_malloc0(sizeof(GuestDiskAddress)); ++ address->has_dev = true; ++ address->dev = g_strdup(disk->name); ++ get_single_disk_info(sdn.DeviceNumber, address, &local_err); ++ if (local_err) { ++ g_debug("failed to get disk info: %s", ++ error_get_pretty(local_err)); ++ error_free(local_err); ++ qapi_free_GuestDiskAddress(address); ++ address = NULL; ++ } else { ++ disk->address = address; ++ disk->has_address = true; ++ } ++ ++ new = g_malloc0(sizeof(GuestDiskInfoList)); ++ new->value = disk; ++ new->next = ret; ++ ret = new; ++ } ++ ++ SetupDiDestroyDeviceInfoList(dev_info); ++ return ret; ++} ++ + #else + + static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) +@@ -967,6 +1062,12 @@ static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp) + return NULL; + } + ++GuestDiskInfoList *qmp_guest_get_disks(Error **errp) ++{ ++ error_setg(errp, QERR_UNSUPPORTED); ++ return NULL; ++} ++ + #endif /* CONFIG_QGA_NTDDSCSI */ + + static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp) +@@ -2234,9 +2335,3 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp) + + return info; + } +- +-GuestDiskInfoList *qmp_guest_get_disks(Error **errp) +-{ +- error_setg(errp, QERR_UNSUPPORTED); +- return NULL; +-} +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch b/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch new file mode 100644 index 0000000..7db6e1f --- /dev/null +++ b/SOURCES/kvm-qga-fix-assert-regression-on-guest-shutdown.patch @@ -0,0 +1,61 @@ +From 93b37bad75d14ed4b9e96cc3587d8ae16cb96ba3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 2 Oct 2020 17:46:08 -0400 +Subject: [PATCH 01/18] qga: fix assert regression on guest-shutdown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201002174608.943992-2-marcandre.lureau@redhat.com> +Patchwork-id: 98534 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] qga: fix assert regression on guest-shutdown +Bugzilla: 1884531 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: Marc-André Lureau + +Since commit 781f2b3d1e ("qga: process_event() simplification"), +send_response() is called unconditionally, but will assert when "rsp" is +NULL. This may happen with QCO_NO_SUCCESS_RESP commands, such as +"guest-shutdown". + +Fixes: 781f2b3d1e5ef389b44016a897fd55e7a780bf35 +Cc: Michael Roth +Reported-by: Christian Ehrhardt +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Christian Ehrhardt +Tested-by: Christian Ehrhardt +Cc: qemu-stable@nongnu.org +Signed-off-by: Michael Roth + +(cherry picked from commit 844bd70b5652f30bbace89499f513e3fbbb6457a) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/main.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/qga/main.c b/qga/main.c +index c35c2a21209..12fa463f4cd 100644 +--- a/qga/main.c ++++ b/qga/main.c +@@ -529,7 +529,11 @@ static int send_response(GAState *s, const QDict *rsp) + QString *payload_qstr, *response_qstr; + GIOStatus status; + +- g_assert(rsp && s->channel); ++ g_assert(s->channel); ++ ++ if (!rsp) { ++ return 0; ++ } + + payload_qstr = qobject_to_json(QOBJECT(rsp)); + if (!payload_qstr) { +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch b/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch new file mode 100644 index 0000000..6ffc5bd --- /dev/null +++ b/SOURCES/kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch @@ -0,0 +1,54 @@ +From c9b1eb9d6c0da9098d5410d90d290d6fca6ea7dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:14 -0500 +Subject: [PATCH 13/14] qga: fix missing closedir() in qmp_guest_get_disks() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-10-marcandre.lureau@redhat.com> +Patchwork-id: 100481 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 09/10] qga: fix missing closedir() in qmp_guest_get_disks() +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Michael Roth + +We opendir("/sys/block") at the beginning of the function, but we never +close it prior to returning. + +Fixes: Coverity CID 1436130 +Fixes: fed3956429d5 ("qga: add implementation of guest-get-disks for Linux") +Reported-by: Peter Maydell +Cc: Marc-André Lureau +Cc: Tomáš Golembiovský +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Michael Roth + +(cherry-picked from commit b1b9ab1c04d560f86d8da3dfca4d8b21de75fee6) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 96f5ddafd3a..9a170dee14c 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1445,6 +1445,9 @@ GuestDiskInfoList *qmp_guest_get_disks(Error **errp) + get_disk_deps(disk_dir, disk); + ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name); + } ++ ++ closedir(dp); ++ + return ret; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-rename-Error-parameter-to-more-common-errp.patch b/SOURCES/kvm-qga-rename-Error-parameter-to-more-common-errp.patch new file mode 100644 index 0000000..2528d26 --- /dev/null +++ b/SOURCES/kvm-qga-rename-Error-parameter-to-more-common-errp.patch @@ -0,0 +1,121 @@ +From 457ba062cc1026a88a70ab3cb9a52acd62c5a2a8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 24 Dec 2020 12:53:02 -0500 +Subject: [PATCH 2/5] qga: rename Error ** parameter to more common errp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201224125304.62697-2-marcandre.lureau@redhat.com> +Patchwork-id: 100498 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/3] qga: rename Error ** parameter to more common errp +Bugzilla: 1910326 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Philippe Mathieu-Daudé + +From: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20191205174635.18758-13-vsementsov@virtuozzo.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Markus Armbruster +Signed-off-by: Markus Armbruster + +(cherry picked from commit b90abbac0b95f68a7ebac5545ab77b98f598a9c7) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 2 +- + qga/commands-win32.c | 2 +- + qga/commands.c | 12 ++++++------ + 3 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index c02373cdf7d..29353e90c8f 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -3134,7 +3134,7 @@ static double ga_get_login_time(struct utmpx *user_info) + return seconds + useconds; + } + +-GuestUserList *qmp_guest_get_users(Error **err) ++GuestUserList *qmp_guest_get_users(Error **errp) + { + GHashTable *cache = NULL; + GuestUserList *head = NULL, *cur_item = NULL; +diff --git a/qga/commands-win32.c b/qga/commands-win32.c +index a07725e874b..618ccdfadaa 100644 +--- a/qga/commands-win32.c ++++ b/qga/commands-win32.c +@@ -2047,7 +2047,7 @@ typedef struct _GA_WTSINFOA { + + } GA_WTSINFOA; + +-GuestUserList *qmp_guest_get_users(Error **err) ++GuestUserList *qmp_guest_get_users(Error **errp) + { + #define QGA_NANOSECONDS 10000000 + +diff --git a/qga/commands.c b/qga/commands.c +index 0c7d1385c23..43c323ceada 100644 +--- a/qga/commands.c ++++ b/qga/commands.c +@@ -143,7 +143,7 @@ static GuestExecInfo *guest_exec_info_find(int64_t pid_numeric) + return NULL; + } + +-GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **err) ++GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **errp) + { + GuestExecInfo *gei; + GuestExecStatus *ges; +@@ -152,7 +152,7 @@ GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **err) + + gei = guest_exec_info_find(pid); + if (gei == NULL) { +- error_setg(err, QERR_INVALID_PARAMETER, "pid"); ++ error_setg(errp, QERR_INVALID_PARAMETER, "pid"); + return NULL; + } + +@@ -385,7 +385,7 @@ GuestExec *qmp_guest_exec(const char *path, + bool has_env, strList *env, + bool has_input_data, const char *input_data, + bool has_capture_output, bool capture_output, +- Error **err) ++ Error **errp) + { + GPid pid; + GuestExec *ge = NULL; +@@ -405,7 +405,7 @@ GuestExec *qmp_guest_exec(const char *path, + arglist.next = has_arg ? arg : NULL; + + if (has_input_data) { +- input = qbase64_decode(input_data, -1, &ninput, err); ++ input = qbase64_decode(input_data, -1, &ninput, errp); + if (!input) { + return NULL; + } +@@ -424,7 +424,7 @@ GuestExec *qmp_guest_exec(const char *path, + guest_exec_task_setup, NULL, &pid, has_input_data ? &in_fd : NULL, + has_output ? &out_fd : NULL, has_output ? &err_fd : NULL, &gerr); + if (!ret) { +- error_setg(err, QERR_QGA_COMMAND_FAILED, gerr->message); ++ error_setg(errp, QERR_QGA_COMMAND_FAILED, gerr->message); + g_error_free(gerr); + goto done; + } +@@ -499,7 +499,7 @@ int ga_parse_whence(GuestFileWhence *whence, Error **errp) + return -1; + } + +-GuestHostName *qmp_guest_get_host_name(Error **err) ++GuestHostName *qmp_guest_get_host_name(Error **errp) + { + GuestHostName *result = NULL; + gchar const *hostname = g_get_host_name(); +-- +2.27.0 + diff --git a/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch b/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch new file mode 100644 index 0000000..727015e --- /dev/null +++ b/SOURCES/kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch @@ -0,0 +1,113 @@ +From ff881d64d3f29825ab093eb2be183658226ccba3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 16 Dec 2020 16:06:15 -0500 +Subject: [PATCH 14/14] qga: update schema for guest-get-disks 'dependents' + field +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201216160615.324213-11-marcandre.lureau@redhat.com> +Patchwork-id: 100480 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 10/10] qga: update schema for guest-get-disks 'dependents' field +Bugzilla: 1859494 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi + +From: Michael Roth + +The recently-added 'guest-get-disk' command returns a list of +GuestDiskInfo entries, which in turn have a 'dependents' field which +lists devices these entries are dependent upon. Thus, 'dependencies' +is a better name for this field. Address this by renaming the field +accordingly. + +Additionally, 'dependents' is specified as non-optional, even though +it's not implemented for w32. This is misleading, since it gives users +the impression that a particular disk might not have dependencies, +when in reality that information is simply not known to the guest +agent. Address this by making 'dependents' an optional field, and only +marking it as in-use when the facilities to obtain this information are +available to the guest agent. + +Cc: Eric Blake +Cc: Tomáš Golembiovský +Cc: Marc-André Lureau +Reviewed-by: Eric Blake +Reviewed-by: Marc-André Lureau +Signed-off-by: Michael Roth + +(cherry-picked from commit a8aa94b5f8427cc2924d8cdd417c8014db1c86c0) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 10 ++++++---- + qga/qapi-schema.json | 8 ++++---- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 9a170dee14c..c02373cdf7d 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1287,6 +1287,7 @@ static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) + g_debug("failed to list entries in %s", deps_dir); + return; + } ++ disk->has_dependencies = true; + while ((dep = g_dir_read_name(dp_deps)) != NULL) { + g_autofree char *dep_dir = NULL; + strList *dep_item = NULL; +@@ -1299,8 +1300,8 @@ static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk) + g_debug(" adding dependent device: %s", dev_name); + dep_item = g_new0(strList, 1); + dep_item->value = dev_name; +- dep_item->next = disk->dependents; +- disk->dependents = dep_item; ++ dep_item->next = disk->dependencies; ++ disk->dependencies = dep_item; + } + } + g_dir_close(dp_deps); +@@ -1353,8 +1354,9 @@ static GuestDiskInfoList *get_disk_partitions( + partition->name = dev_name; + partition->partition = true; + /* Add parent disk as dependent for easier tracking of hierarchy */ +- partition->dependents = g_new0(strList, 1); +- partition->dependents->value = g_strdup(disk_dev); ++ partition->dependencies = g_new0(strList, 1); ++ partition->dependencies->value = g_strdup(disk_dev); ++ partition->has_dependencies = true; + + item = g_new0(GuestDiskInfoList, 1); + item->value = partition; +diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json +index 22df375c92f..4222cb92d34 100644 +--- a/qga/qapi-schema.json ++++ b/qga/qapi-schema.json +@@ -857,9 +857,9 @@ + # + # @name: device node (Linux) or device UNC (Windows) + # @partition: whether this is a partition or disk +-# @dependents: list of dependent devices; e.g. for LVs of the LVM this will +-# hold the list of PVs, for LUKS encrypted volume this will +-# contain the disk where the volume is placed. (Linux) ++# @dependencies: list of device dependencies; e.g. for LVs of the LVM this will ++# hold the list of PVs, for LUKS encrypted volume this will ++# contain the disk where the volume is placed. (Linux) + # @address: disk address information (only for non-virtual devices) + # @alias: optional alias assigned to the disk, on Linux this is a name assigned + # by device mapper +@@ -867,7 +867,7 @@ + # Since 5.2 + ## + { 'struct': 'GuestDiskInfo', +- 'data': {'name': 'str', 'partition': 'bool', 'dependents': ['str'], ++ 'data': {'name': 'str', 'partition': 'bool', '*dependencies': ['str'], + '*address': 'GuestDiskAddress', '*alias': 'str'} } + + ## +-- +2.27.0 + diff --git a/SOURCES/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch b/SOURCES/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch new file mode 100644 index 0000000..55be349 --- /dev/null +++ b/SOURCES/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch @@ -0,0 +1,72 @@ +From b07219611480dd4a37b2476604a1cec35c812216 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 23 Dec 2020 12:29:24 -0500 +Subject: [PATCH 1/5] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201223122924.341944-1-marcandre.lureau@redhat.com> +Patchwork-id: 100496 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +Bugzilla: 1910267 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Danilo de Paula + +From: Danilo de Paula + +BZ: 1910267 +BRANCH: rhel-8.4.0 +UPSTREAM: RHEL-only +BREW: 33929331 + +When qemu-ga was introduced to RHEL-8, we used the qemu-guest-agent +from RHEL-7 as base. + +In RHEL-7, qemu-guest-agent is built as standalone package. +It's built as "qemu-ga", hence the "qemu-ga" folders. + +For RHEL-8, that should have been renamed to qemu-kvm, but I missed it. +Renaming those folders to /etc/qemu-kvm is a no go today, because +users might have populated the /etc/qemu-ga/fsfreeze-hook.d folder. + +So, in order to make qemu-ga -F works in RHEL-8, a link is being +created in the expected place, pointing to the real one. + +Also, fsfreeze-hook opens up the fsfreeze-hook.d on the same PATH where +it is stored. However, it doesn't follow symlinks. In order to fix this, +I had to change it to make sure it follows the link. + +An option would be to also link the fsfreeze-hook.d folder, but I choose +not to do so as it creates a permanent/visible change in users +environments. The downside is to keep another downstream-only change. + +Signed-off-by: Danilo C. L. de Paula + +[ cherry-picked from commit 020501879841afb788087f0455df79367c0337a0 ] +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + redhat/qemu-kvm.spec.template | 6 ++++++ + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + 2 files changed, 7 insertions(+), 1 deletion(-) + + +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd48451..e9b84ec0284 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-guest-support-for-diagnose-0x318.patch b/SOURCES/kvm-s390-guest-support-for-diagnose-0x318.patch new file mode 100644 index 0000000..84fc7bc --- /dev/null +++ b/SOURCES/kvm-s390-guest-support-for-diagnose-0x318.patch @@ -0,0 +1,282 @@ +From 7ad1c4aaea6cd202449c05fc0034af6b108def4f Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:14 -0500 +Subject: [PATCH 14/18] s390: guest support for diagnose 0x318 + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-11-thuth@redhat.com> +Patchwork-id: 99507 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 10/12] s390: guest support for diagnose 0x318 +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +DIAGNOSE 0x318 (diag318) is an s390 instruction that allows the storage +of diagnostic information that is collected by the firmware in the case +of hardware/firmware service events. + +QEMU handles the instruction by storing the info in the CPU state. A +subsequent register sync will communicate the data to the hypervisor. + +QEMU handles the migration via a VM State Description. + +This feature depends on the Extended-Length SCCB (els) feature. If +els is not present, then a warning will be printed and the SCLP bit +that allows the Linux kernel to execute the instruction will not be +set. + +Availability of this instruction is determined by byte 134 (aka fac134) +bit 0 of the SCLP Read Info block. This coincidentally expands into the +space used for CPU entries, which means VMs running with the diag318 +capability may not be able to read information regarding all CPUs +unless the guest kernel supports an extended-length SCCB. + +This feature is not supported in protected virtualization mode. + +Signed-off-by: Collin Walling +Acked-by: Janosch Frank +Acked-by: Thomas Huth +Acked-by: David Hildenbrand +Acked-by: Claudio Imbrenda +Message-Id: <20200915194416.107460-9-walling@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit fabdada9357b9cfd980c7744ddce47e34600bbef) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 5 ++++ + include/hw/s390x/sclp.h | 8 ++++++ + target/s390x/cpu.h | 2 ++ + target/s390x/cpu_features.h | 1 + + target/s390x/cpu_features_def.inc.h | 3 +++ + target/s390x/cpu_models.c | 1 + + target/s390x/gen-features.c | 1 + + target/s390x/kvm.c | 39 +++++++++++++++++++++++++++++ + target/s390x/machine.c | 17 +++++++++++++ + 9 files changed, 77 insertions(+) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index 8d111628e04..2931046f456 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -139,6 +139,11 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + s390_get_feat_block(S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, + read_info->conf_char_ext); + ++ if (s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB)) { ++ s390_get_feat_block(S390_FEAT_TYPE_SCLP_FAC134, ++ &read_info->fac134); ++ } ++ + read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO | + SCLP_HAS_IOA_RECONFIG); + +diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h +index 62e2aa1d9f1..addd904e5f4 100644 +--- a/include/hw/s390x/sclp.h ++++ b/include/hw/s390x/sclp.h +@@ -133,7 +133,15 @@ typedef struct ReadInfo { + uint16_t highest_cpu; + uint8_t _reserved5[124 - 122]; /* 122-123 */ + uint32_t hmfai; ++ uint8_t _reserved7[134 - 128]; /* 128-133 */ ++ uint8_t fac134; ++ uint8_t _reserved8[144 - 135]; /* 135-143 */ + struct CPUEntry entries[]; ++ /* ++ * When the Extended-Length SCCB (ELS) feature is enabled the ++ * start of the entries field begins at an offset denoted by the ++ * offset_cpu field, otherwise it's at an offset of 128. ++ */ + } QEMU_PACKED ReadInfo; + + typedef struct ReadCpuInfo { +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index a48e655c4d4..1dc21cd311d 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -117,6 +117,8 @@ struct CPUS390XState { + uint16_t external_call_addr; + DECLARE_BITMAP(emergency_signals, S390_MAX_CPUS); + ++ uint64_t diag318_info; ++ + /* Fields up to this point are cleared by a CPU reset */ + struct {} end_reset_fields; + +diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h +index da695a8346e..f74f7fc3a11 100644 +--- a/target/s390x/cpu_features.h ++++ b/target/s390x/cpu_features.h +@@ -23,6 +23,7 @@ typedef enum { + S390_FEAT_TYPE_STFL, + S390_FEAT_TYPE_SCLP_CONF_CHAR, + S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, ++ S390_FEAT_TYPE_SCLP_FAC134, + S390_FEAT_TYPE_SCLP_CPU, + S390_FEAT_TYPE_MISC, + S390_FEAT_TYPE_PLO, +diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h +index 3548d65a69a..cf7e04ee44f 100644 +--- a/target/s390x/cpu_features_def.inc.h ++++ b/target/s390x/cpu_features_def.inc.h +@@ -122,6 +122,9 @@ DEF_FEAT(SIE_CMMA, "cmma", SCLP_CONF_CHAR_EXT, 1, "SIE: Collaborative-memory-man + DEF_FEAT(SIE_PFMFI, "pfmfi", SCLP_CONF_CHAR_EXT, 9, "SIE: PFMF interpretation facility") + DEF_FEAT(SIE_IBS, "ibs", SCLP_CONF_CHAR_EXT, 10, "SIE: Interlock-and-broadcast-suppression facility") + ++/* Features exposed via SCLP SCCB Facilities byte 134 (bit numbers relative to byte-134) */ ++DEF_FEAT(DIAG_318, "diag318", SCLP_FAC134, 0, "Control program name and version codes") ++ + /* Features exposed via SCLP CPU info. */ + DEF_FEAT(SIE_F2, "sief2", SCLP_CPU, 4, "SIE: interception format 2 (Virtual SIE)") + DEF_FEAT(SIE_SKEY, "skey", SCLP_CPU, 5, "SIE: Storage-key facility") +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index be718220d79..bf6a3faba9e 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -823,6 +823,7 @@ static void check_consistency(const S390CPUModel *model) + { S390_FEAT_PTFF_STOE, S390_FEAT_MULTIPLE_EPOCH }, + { S390_FEAT_PTFF_STOUE, S390_FEAT_MULTIPLE_EPOCH }, + { S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, S390_FEAT_AP }, ++ { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB }, + }; + int i; + +diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c +index 6857f657fba..a1f0a6f3c6f 100644 +--- a/target/s390x/gen-features.c ++++ b/target/s390x/gen-features.c +@@ -523,6 +523,7 @@ static uint16_t full_GEN12_GA1[] = { + S390_FEAT_AP_FACILITIES_TEST, + S390_FEAT_AP, + S390_FEAT_EXTENDED_LENGTH_SCCB, ++ S390_FEAT_DIAG_318, + }; + + static uint16_t full_GEN12_GA2[] = { +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index ef437acb5c1..e5e190d21c9 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -105,6 +105,7 @@ + + #define DIAG_TIMEREVENT 0x288 + #define DIAG_IPL 0x308 ++#define DIAG_SET_CONTROL_PROGRAM_CODES 0x318 + #define DIAG_KVM_HYPERCALL 0x500 + #define DIAG_KVM_BREAKPOINT 0x501 + +@@ -602,6 +603,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ETOKEN; + } + ++ if (can_sync_regs(cs, KVM_SYNC_DIAG318)) { ++ cs->kvm_run->s.regs.diag318 = env->diag318_info; ++ cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_DIAG318; ++ } ++ + /* Finally the prefix */ + if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { + cs->kvm_run->s.regs.prefix = env->psa; +@@ -741,6 +747,10 @@ int kvm_arch_get_registers(CPUState *cs) + } + } + ++ if (can_sync_regs(cs, KVM_SYNC_DIAG318)) { ++ env->diag318_info = cs->kvm_run->s.regs.diag318; ++ } ++ + return 0; + } + +@@ -1601,6 +1611,27 @@ static int handle_sw_breakpoint(S390CPU *cpu, struct kvm_run *run) + return -ENOENT; + } + ++static void handle_diag_318(S390CPU *cpu, struct kvm_run *run) ++{ ++ uint64_t reg = (run->s390_sieic.ipa & 0x00f0) >> 4; ++ uint64_t diag318_info = run->s.regs.gprs[reg]; ++ ++ /* ++ * DIAG 318 can only be enabled with KVM support. As such, let's ++ * ensure a guest cannot execute this instruction erroneously. ++ */ ++ if (!s390_has_feat(S390_FEAT_DIAG_318)) { ++ kvm_s390_program_interrupt(cpu, PGM_SPECIFICATION); ++ } ++ ++ cpu->env.diag318_info = diag318_info; ++ ++ if (can_sync_regs(CPU(cpu), KVM_SYNC_DIAG318)) { ++ run->s.regs.diag318 = diag318_info; ++ run->kvm_dirty_regs |= KVM_SYNC_DIAG318; ++ } ++} ++ + #define DIAG_KVM_CODE_MASK 0x000000000000ffff + + static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb) +@@ -1620,6 +1651,9 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb) + case DIAG_IPL: + kvm_handle_diag_308(cpu, run); + break; ++ case DIAG_SET_CONTROL_PROGRAM_CODES: ++ handle_diag_318(cpu, run); ++ break; + case DIAG_KVM_HYPERCALL: + r = handle_hypercall(cpu, run); + break; +@@ -2449,6 +2483,11 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) + */ + set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features); + ++ /* DIAGNOSE 0x318 is not supported under protected virtualization */ ++ if (!s390_is_pv() && kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) { ++ set_bit(S390_FEAT_DIAG_318, model->features); ++ } ++ + /* strip of features that are not part of the maximum model */ + bitmap_and(model->features, model->features, model->def->full_feat, + S390_FEAT_MAX); +diff --git a/target/s390x/machine.c b/target/s390x/machine.c +index 549bb6c2808..5b4e82f1ab9 100644 +--- a/target/s390x/machine.c ++++ b/target/s390x/machine.c +@@ -234,6 +234,22 @@ const VMStateDescription vmstate_etoken = { + } + }; + ++static bool diag318_needed(void *opaque) ++{ ++ return s390_has_feat(S390_FEAT_DIAG_318); ++} ++ ++const VMStateDescription vmstate_diag318 = { ++ .name = "cpu/diag318", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = diag318_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.diag318_info, S390CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + const VMStateDescription vmstate_s390_cpu = { + .name = "cpu", + .post_load = cpu_post_load, +@@ -270,6 +286,7 @@ const VMStateDescription vmstate_s390_cpu = { + &vmstate_gscb, + &vmstate_bpbc, + &vmstate_etoken, ++ &vmstate_diag318, + NULL + }, + }; +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch b/SOURCES/kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch new file mode 100644 index 0000000..f0f25a5 --- /dev/null +++ b/SOURCES/kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch @@ -0,0 +1,163 @@ +From a0ad4344984c50939be8c99371af0988551fb776 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 20 Nov 2020 11:46:09 -0500 +Subject: [PATCH 17/18] s390/kvm: fix diag318 propagation and reset + functionality + +RH-Author: Thomas Huth +Message-id: <20201120114609.408610-2-thuth@redhat.com> +Patchwork-id: 99787 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] s390/kvm: fix diag318 propagation and reset functionality +Bugzilla: 1659412 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +The Control Program Name Code (CPNC) portion of the diag318 +info must be set within the SIE block of each VCPU in the +configuration. The handler will iterate through each VCPU +and dirty the diag318_info reg to be synced with KVM on a +subsequent sync_regs call. + +Additionally, the diag318 info resets must be handled via +userspace. As such, QEMU will reset this value for each +VCPU during a modified clear, load normal, and load clear +reset event. + +Fixes: fabdada9357b ("s390: guest support for diagnose 0x318") +Signed-off-by: Collin Walling +Message-Id: <20201113221022.257054-1-walling@linux.ibm.com> +Reviewed-by: Thomas Huth +Reviewed-by: Janosch Frank +Signed-off-by: Cornelia Huck +(cherry picked from commit e2c6cd567422bfa563be026b9741a1854aecdc06) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 4 ++++ + target/s390x/cpu.c | 7 +++++++ + target/s390x/cpu.h | 1 + + target/s390x/kvm-stub.c | 4 ++++ + target/s390x/kvm.c | 22 +++++++++++++++++----- + target/s390x/kvm_s390x.h | 1 + + 6 files changed, 34 insertions(+), 5 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index e6ed13b649a..5905d2b7adc 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -489,6 +489,10 @@ static void s390_machine_reset(MachineState *machine) + default: + g_assert_not_reached(); + } ++ ++ CPU_FOREACH(t) { ++ run_on_cpu(t, s390_do_cpu_set_diag318, RUN_ON_CPU_HOST_ULONG(0)); ++ } + s390_ipl_clear_reset_request(); + } + +diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c +index 371b91b2d72..820cab96e12 100644 +--- a/target/s390x/cpu.c ++++ b/target/s390x/cpu.c +@@ -445,6 +445,13 @@ void s390_enable_css_support(S390CPU *cpu) + kvm_s390_enable_css_support(cpu); + } + } ++ ++void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg) ++{ ++ if (kvm_enabled()) { ++ kvm_s390_set_diag318(cs, arg.host_ulong); ++ } ++} + #endif + + static gchar *s390_gdb_arch_name(CPUState *cs) +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index 1dc21cd311d..83a23a11b96 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -774,6 +774,7 @@ int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit); + void s390_set_max_pagesize(uint64_t pagesize, Error **errp); + void s390_cmma_reset(void); + void s390_enable_css_support(S390CPU *cpu); ++void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg); + int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id, + int vq, bool assign); + #ifndef CONFIG_USER_ONLY +diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c +index aa185017a2a..9970b5a8c70 100644 +--- a/target/s390x/kvm-stub.c ++++ b/target/s390x/kvm-stub.c +@@ -120,3 +120,7 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) + void kvm_s390_restart_interrupt(S390CPU *cpu) + { + } ++ ++void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info) ++{ ++} +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 6edb52f6d25..8d4406124b9 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -1611,10 +1611,23 @@ static int handle_sw_breakpoint(S390CPU *cpu, struct kvm_run *run) + return -ENOENT; + } + ++void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info) ++{ ++ CPUS390XState *env = &S390_CPU(cs)->env; ++ ++ /* Feat bit is set only if KVM supports sync for diag318 */ ++ if (s390_has_feat(S390_FEAT_DIAG_318)) { ++ env->diag318_info = diag318_info; ++ cs->kvm_run->s.regs.diag318 = diag318_info; ++ cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_DIAG318; ++ } ++} ++ + static void handle_diag_318(S390CPU *cpu, struct kvm_run *run) + { + uint64_t reg = (run->s390_sieic.ipa & 0x00f0) >> 4; + uint64_t diag318_info = run->s.regs.gprs[reg]; ++ CPUState *t; + + /* + * DIAG 318 can only be enabled with KVM support. As such, let's +@@ -1622,13 +1635,12 @@ static void handle_diag_318(S390CPU *cpu, struct kvm_run *run) + */ + if (!s390_has_feat(S390_FEAT_DIAG_318)) { + kvm_s390_program_interrupt(cpu, PGM_SPECIFICATION); ++ return; + } + +- cpu->env.diag318_info = diag318_info; +- +- if (can_sync_regs(CPU(cpu), KVM_SYNC_DIAG318)) { +- run->s.regs.diag318 = diag318_info; +- run->kvm_dirty_regs |= KVM_SYNC_DIAG318; ++ CPU_FOREACH(t) { ++ run_on_cpu(t, s390_do_cpu_set_diag318, ++ RUN_ON_CPU_HOST_ULONG(diag318_info)); + } + } + +diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h +index 6ab17c81b73..25bbe98b251 100644 +--- a/target/s390x/kvm_s390x.h ++++ b/target/s390x/kvm_s390x.h +@@ -45,5 +45,6 @@ void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp); + void kvm_s390_crypto_reset(void); + void kvm_s390_restart_interrupt(S390CPU *cpu); + void kvm_s390_stop_interrupt(S390CPU *cpu); ++void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info); + + #endif /* KVM_S390X_H */ +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch b/SOURCES/kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch new file mode 100644 index 0000000..c05f50c --- /dev/null +++ b/SOURCES/kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch @@ -0,0 +1,220 @@ +From e1a3684f9b08fa9db35331b5c5ad11879f512e90 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:11 -0500 +Subject: [PATCH 11/18] s390/sclp: add extended-length sccb support for kvm + guest + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-8-thuth@redhat.com> +Patchwork-id: 99504 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 07/12] s390/sclp: add extended-length sccb support for kvm guest +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +As more features and facilities are added to the Read SCP Info (RSCPI) +response, more space is required to store them. The space used to store +these new features intrudes on the space originally used to store CPU +entries. This means as more features and facilities are added to the +RSCPI response, less space can be used to store CPU entries. + +With the Extended-Length SCCB (ELS) facility, a KVM guest can execute +the RSCPI command and determine if the SCCB is large enough to store a +complete reponse. If it is not large enough, then the required length +will be set in the SCCB header. + +The caller of the SCLP command is responsible for creating a +large-enough SCCB to store a complete response. Proper checking should +be in place, and the caller should execute the command once-more with +the large-enough SCCB. + +This facility also enables an extended SCCB for the Read CPU Info +(RCPUI) command. + +When this facility is enabled, the boundary violation response cannot +be a result from the RSCPI, RSCPI Forced, or RCPUI commands. + +In order to tolerate kernels that do not yet have full support for this +feature, a "fixed" offset to the start of the CPU Entries within the +Read SCP Info struct is set to allow for the original 248 max entries +when this feature is disabled. + +Additionally, this is introduced as a CPU feature to protect the guest +from migrating to a machine that does not support storing an extended +SCCB. This could otherwise hinder the VM from being able to read all +available CPU entries after migration (such as during re-ipl). + +Signed-off-by: Collin Walling +Reviewed-by: Thomas Huth +Acked-by: Cornelia Huck +Reviewed-by: Claudio Imbrenda +Message-Id: <20200915194416.107460-7-walling@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 1ecd6078f587cfadda8edc93d45b5072e35f2d17) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 43 +++++++++++++++++++++++++---- + include/hw/s390x/sclp.h | 1 + + target/s390x/cpu_features_def.inc.h | 1 + + target/s390x/gen-features.c | 1 + + target/s390x/kvm.c | 8 ++++++ + 5 files changed, 48 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index 017989b3888..8d111628e04 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -49,13 +49,30 @@ static inline bool sclp_command_code_valid(uint32_t code) + return false; + } + +-static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len) ++static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len, ++ uint32_t code) + { + uint64_t sccb_max_addr = sccb_addr + sccb_len - 1; + uint64_t sccb_boundary = (sccb_addr & PAGE_MASK) + PAGE_SIZE; + +- if (sccb_max_addr < sccb_boundary) { +- return true; ++ switch (code & SCLP_CMD_CODE_MASK) { ++ case SCLP_CMDW_READ_SCP_INFO: ++ case SCLP_CMDW_READ_SCP_INFO_FORCED: ++ case SCLP_CMDW_READ_CPU_INFO: ++ /* ++ * An extended-length SCCB is only allowed for Read SCP/CPU Info and ++ * is allowed to exceed the 4k boundary. The respective commands will ++ * set the length field to the required length if an insufficient ++ * SCCB length is provided. ++ */ ++ if (s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB)) { ++ return true; ++ } ++ /* fallthrough */ ++ default: ++ if (sccb_max_addr < sccb_boundary) { ++ return true; ++ } + } + + return false; +@@ -80,6 +97,12 @@ static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) + + #define SCCB_REQ_LEN(s, max_cpus) (sizeof(s) + max_cpus * sizeof(CPUEntry)) + ++static inline bool ext_len_sccb_supported(SCCBHeader header) ++{ ++ return s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) && ++ header.control_mask[2] & SCLP_VARIABLE_LENGTH_RESPONSE; ++} ++ + /* Provide information about the configuration, CPUs and storage */ + static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + { +@@ -89,10 +112,15 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + int rnsize, rnmax; + IplParameterBlock *ipib = s390_ipl_get_iplb(); + int required_len = SCCB_REQ_LEN(ReadInfo, machine->possible_cpus->len); +- int offset_cpu = offsetof(ReadInfo, entries); ++ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ? ++ offsetof(ReadInfo, entries) : ++ SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET; + CPUEntry *entries_start = (void *)sccb + offset_cpu; + + if (be16_to_cpu(sccb->h.length) < required_len) { ++ if (ext_len_sccb_supported(sccb->h)) { ++ sccb->h.length = cpu_to_be16(required_len); ++ } + sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); + return; + } +@@ -153,6 +181,9 @@ static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb) + int required_len = SCCB_REQ_LEN(ReadCpuInfo, machine->possible_cpus->len); + + if (be16_to_cpu(sccb->h.length) < required_len) { ++ if (ext_len_sccb_supported(sccb->h)) { ++ sccb->h.length = cpu_to_be16(required_len); ++ } + sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); + return; + } +@@ -249,7 +280,7 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, + goto out_write; + } + +- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { ++ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) { + work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); + goto out_write; + } +@@ -302,7 +333,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + goto out_write; + } + +- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { ++ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) { + work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); + goto out_write; + } +diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h +index 55f53a46540..df2fa4169b0 100644 +--- a/include/hw/s390x/sclp.h ++++ b/include/hw/s390x/sclp.h +@@ -110,6 +110,7 @@ typedef struct CPUEntry { + uint8_t reserved1; + } QEMU_PACKED CPUEntry; + ++#define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128 + typedef struct ReadInfo { + SCCBHeader h; + uint16_t rnmax; +diff --git a/target/s390x/cpu_features_def.inc.h b/target/s390x/cpu_features_def.inc.h +index 60db28351d0..3548d65a69a 100644 +--- a/target/s390x/cpu_features_def.inc.h ++++ b/target/s390x/cpu_features_def.inc.h +@@ -97,6 +97,7 @@ DEF_FEAT(GUARDED_STORAGE, "gs", STFL, 133, "Guarded-storage facility") + DEF_FEAT(VECTOR_PACKED_DECIMAL, "vxpd", STFL, 134, "Vector packed decimal facility") + DEF_FEAT(VECTOR_ENH, "vxeh", STFL, 135, "Vector enhancements facility") + DEF_FEAT(MULTIPLE_EPOCH, "mepoch", STFL, 139, "Multiple-epoch facility") ++DEF_FEAT(EXTENDED_LENGTH_SCCB, "els", STFL, 140, "Extended-length SCCB facility") + DEF_FEAT(TEST_PENDING_EXT_INTERRUPTION, "tpei", STFL, 144, "Test-pending-external-interruption facility") + DEF_FEAT(INSERT_REFERENCE_BITS_MULT, "irbm", STFL, 145, "Insert-reference-bits-multiple facility") + DEF_FEAT(MSA_EXT_8, "msa8-base", STFL, 146, "Message-security-assist-extension-8 facility (excluding subfunctions)") +diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c +index 8ddeebc5441..6857f657fba 100644 +--- a/target/s390x/gen-features.c ++++ b/target/s390x/gen-features.c +@@ -522,6 +522,7 @@ static uint16_t full_GEN12_GA1[] = { + S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, + S390_FEAT_AP_FACILITIES_TEST, + S390_FEAT_AP, ++ S390_FEAT_EXTENDED_LENGTH_SCCB, + }; + + static uint16_t full_GEN12_GA2[] = { +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 0bbf8f81b09..ef437acb5c1 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -2441,6 +2441,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) + KVM_S390_VM_CRYPTO_ENABLE_APIE)) { + set_bit(S390_FEAT_AP, model->features); + } ++ ++ /* ++ * Extended-Length SCCB is handled entirely within QEMU. ++ * For PV guests this is completely fenced by the Ultravisor, as Service ++ * Call error checking and STFLE interpretation are handled via SIE. ++ */ ++ set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features); ++ + /* strip of features that are not part of the maximum model */ + bitmap_and(model->features, model->features, model->def->full_feat, + S390_FEAT_MAX); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch b/SOURCES/kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch new file mode 100644 index 0000000..6efc35f --- /dev/null +++ b/SOURCES/kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch @@ -0,0 +1,106 @@ +From 6cc7c8dd7a6fac493c648c607bec4c38c0b275b6 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:09 -0500 +Subject: [PATCH 09/18] s390/sclp: check sccb len before filling in data + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-6-thuth@redhat.com> +Patchwork-id: 99502 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 05/12] s390/sclp: check sccb len before filling in data +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +The SCCB must be checked for a sufficient length before it is filled +with any data. If the length is insufficient, then the SCLP command +is suppressed and the proper response code is set in the SCCB header. + +While we're at it, let's cleanup the length check by placing the +calculation inside a macro. + +Fixes: 832be0d8a3bb ("s390x: sclp: Report insufficient SCCB length") +Signed-off-by: Collin Walling +Reviewed-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Cornelia Huck +Reviewed-by: Thomas Huth +Reviewed-by: Claudio Imbrenda +Message-Id: <20200915194416.107460-5-walling@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 0260b97824495ebfacfa8bbae0be10b0ef986bf6) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index cf1292beb22..2b4c6c5cfad 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -78,6 +78,8 @@ static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) + } + } + ++#define SCCB_REQ_LEN(s, max_cpus) (sizeof(s) + max_cpus * sizeof(CPUEntry)) ++ + /* Provide information about the configuration, CPUs and storage */ + static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + { +@@ -86,6 +88,12 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + int cpu_count; + int rnsize, rnmax; + IplParameterBlock *ipib = s390_ipl_get_iplb(); ++ int required_len = SCCB_REQ_LEN(ReadInfo, machine->possible_cpus->len); ++ ++ if (be16_to_cpu(sccb->h.length) < required_len) { ++ sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); ++ return; ++ } + + /* CPU information */ + prepare_cpu_entries(machine, read_info->entries, &cpu_count); +@@ -95,12 +103,6 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + + read_info->ibc_val = cpu_to_be32(s390_get_ibc_val()); + +- if (be16_to_cpu(sccb->h.length) < +- (sizeof(ReadInfo) + cpu_count * sizeof(CPUEntry))) { +- sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); +- return; +- } +- + /* Configuration Characteristic (Extension) */ + s390_get_feat_block(S390_FEAT_TYPE_SCLP_CONF_CHAR, + read_info->conf_char); +@@ -146,18 +148,18 @@ static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb) + MachineState *machine = MACHINE(qdev_get_machine()); + ReadCpuInfo *cpu_info = (ReadCpuInfo *) sccb; + int cpu_count; ++ int required_len = SCCB_REQ_LEN(ReadCpuInfo, machine->possible_cpus->len); ++ ++ if (be16_to_cpu(sccb->h.length) < required_len) { ++ sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); ++ return; ++ } + + prepare_cpu_entries(machine, cpu_info->entries, &cpu_count); + cpu_info->nr_configured = cpu_to_be16(cpu_count); + cpu_info->offset_configured = cpu_to_be16(offsetof(ReadCpuInfo, entries)); + cpu_info->nr_standby = cpu_to_be16(0); + +- if (be16_to_cpu(sccb->h.length) < +- (sizeof(ReadCpuInfo) + cpu_count * sizeof(CPUEntry))) { +- sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); +- return; +- } +- + /* The standby offset is 16-byte for each CPU */ + cpu_info->offset_standby = cpu_to_be16(cpu_info->offset_configured + + cpu_info->nr_configured*sizeof(CPUEntry)); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch b/SOURCES/kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch new file mode 100644 index 0000000..09c72b6 --- /dev/null +++ b/SOURCES/kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch @@ -0,0 +1,75 @@ +From 44e8cdba29b932ee6fff7a2d00b09e6e78c3a0ef Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:06 -0500 +Subject: [PATCH 06/18] s390/sclp: get machine once during read scp/cpu info + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-3-thuth@redhat.com> +Patchwork-id: 99499 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 02/12] s390/sclp: get machine once during read scp/cpu info +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +Functions within read scp/cpu info will need access to the machine +state. Let's make a call to retrieve the machine state once and +pass the appropriate data to the respective functions. + +Signed-off-by: Collin Walling +Reviewed-by: David Hildenbrand +Reviewed-by: Thomas Huth +Reviewed-by: Janosch Frank +Reviewed-by: Cornelia Huck +Reviewed-by: Claudio Imbrenda +Message-Id: <20200915194416.107460-2-walling@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 912d70d2755cb9b3144eeed4014580ebc5485ce6) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index d8ae207731f..fe7d0fece80 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -49,9 +49,8 @@ static inline bool sclp_command_code_valid(uint32_t code) + return false; + } + +-static void prepare_cpu_entries(SCLPDevice *sclp, CPUEntry *entry, int *count) ++static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) + { +- MachineState *ms = MACHINE(qdev_get_machine()); + uint8_t features[SCCB_CPU_FEATURE_LEN] = { 0 }; + int i; + +@@ -77,7 +76,7 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + IplParameterBlock *ipib = s390_ipl_get_iplb(); + + /* CPU information */ +- prepare_cpu_entries(sclp, read_info->entries, &cpu_count); ++ prepare_cpu_entries(machine, read_info->entries, &cpu_count); + read_info->entries_cpu = cpu_to_be16(cpu_count); + read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries)); + read_info->highest_cpu = cpu_to_be16(machine->smp.max_cpus - 1); +@@ -132,10 +131,11 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + /* Provide information about the CPU */ + static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb) + { ++ MachineState *machine = MACHINE(qdev_get_machine()); + ReadCpuInfo *cpu_info = (ReadCpuInfo *) sccb; + int cpu_count; + +- prepare_cpu_entries(sclp, cpu_info->entries, &cpu_count); ++ prepare_cpu_entries(machine, cpu_info->entries, &cpu_count); + cpu_info->nr_configured = cpu_to_be16(cpu_count); + cpu_info->offset_configured = cpu_to_be16(offsetof(ReadCpuInfo, entries)); + cpu_info->nr_standby = cpu_to_be16(0); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch b/SOURCES/kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch new file mode 100644 index 0000000..adb65c7 --- /dev/null +++ b/SOURCES/kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch @@ -0,0 +1,170 @@ +From 212c129b82f0a53725a4167303de2ee0a865f82d Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:08 -0500 +Subject: [PATCH 08/18] s390/sclp: read sccb from mem based on provided length + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-5-thuth@redhat.com> +Patchwork-id: 99501 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 04/12] s390/sclp: read sccb from mem based on provided length +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +The header contained within the SCCB passed to the SCLP service call +contains the actual length of the SCCB. Instead of allocating a static +4K size for the work sccb, let's allow for a variable size determined +by the value in the header. The proper checks are already in place to +ensure the SCCB length is sufficent to store a full response and that +the length does not cross any explicitly-set boundaries. + +Signed-off-by: Collin Walling +Reviewed-by: Thomas Huth +Reviewed-by: Claudio Imbrenda +Message-Id: <20200915194416.107460-4-walling@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit c1db53a5910f988eeb32f031c53a50f3373fd824) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/event-facility.c | 2 +- + hw/s390x/sclp.c | 55 ++++++++++++++++++++++----------------- + include/hw/s390x/sclp.h | 2 +- + 3 files changed, 33 insertions(+), 26 deletions(-) + +diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c +index 66205697ae7..8aa7017f06b 100644 +--- a/hw/s390x/event-facility.c ++++ b/hw/s390x/event-facility.c +@@ -215,7 +215,7 @@ static uint16_t handle_sccb_read_events(SCLPEventFacility *ef, SCCB *sccb, + + event_buf = &red->ebh; + event_buf->length = 0; +- slen = sizeof(sccb->data); ++ slen = sccb_data_len(sccb); + + rc = SCLP_RC_NO_EVENT_BUFFERS_STORED; + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index 38278497319..cf1292beb22 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -231,25 +231,29 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, + { + SCLPDevice *sclp = get_sclp_device(); + SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); +- SCCB work_sccb; +- hwaddr sccb_len = sizeof(SCCB); ++ SCCBHeader header; ++ g_autofree SCCB *work_sccb = NULL; + +- s390_cpu_pv_mem_read(env_archcpu(env), 0, &work_sccb, sccb_len); ++ s390_cpu_pv_mem_read(env_archcpu(env), 0, &header, sizeof(SCCBHeader)); ++ ++ work_sccb = g_malloc0(be16_to_cpu(header.length)); ++ s390_cpu_pv_mem_read(env_archcpu(env), 0, work_sccb, ++ be16_to_cpu(header.length)); + + if (!sclp_command_code_valid(code)) { +- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); ++ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); + goto out_write; + } + +- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { +- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); ++ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { ++ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); + goto out_write; + } + +- sclp_c->execute(sclp, &work_sccb, code); ++ sclp_c->execute(sclp, work_sccb, code); + out_write: +- s390_cpu_pv_mem_write(env_archcpu(env), 0, &work_sccb, +- be16_to_cpu(work_sccb.h.length)); ++ s390_cpu_pv_mem_write(env_archcpu(env), 0, work_sccb, ++ be16_to_cpu(work_sccb->h.length)); + sclp_c->service_interrupt(sclp, SCLP_PV_DUMMY_ADDR); + return 0; + } +@@ -258,9 +262,8 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + { + SCLPDevice *sclp = get_sclp_device(); + SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); +- SCCB work_sccb; +- +- hwaddr sccb_len = sizeof(SCCB); ++ SCCBHeader header; ++ g_autofree SCCB *work_sccb = NULL; + + /* first some basic checks on program checks */ + if (env->psw.mask & PSW_MASK_PSTATE) { +@@ -274,32 +277,36 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + return -PGM_SPECIFICATION; + } + ++ /* the header contains the actual length of the sccb */ ++ cpu_physical_memory_read(sccb, &header, sizeof(SCCBHeader)); ++ ++ /* Valid sccb sizes */ ++ if (be16_to_cpu(header.length) < sizeof(SCCBHeader)) { ++ return -PGM_SPECIFICATION; ++ } ++ + /* + * we want to work on a private copy of the sccb, to prevent guests + * from playing dirty tricks by modifying the memory content after + * the host has checked the values + */ +- cpu_physical_memory_read(sccb, &work_sccb, sccb_len); +- +- /* Valid sccb sizes */ +- if (be16_to_cpu(work_sccb.h.length) < sizeof(SCCBHeader)) { +- return -PGM_SPECIFICATION; +- } ++ work_sccb = g_malloc0(be16_to_cpu(header.length)); ++ cpu_physical_memory_read(sccb, work_sccb, be16_to_cpu(header.length)); + + if (!sclp_command_code_valid(code)) { +- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); ++ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); + goto out_write; + } + +- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { +- work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); ++ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length))) { ++ work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); + goto out_write; + } + +- sclp_c->execute(sclp, &work_sccb, code); ++ sclp_c->execute(sclp, work_sccb, code); + out_write: +- cpu_physical_memory_write(sccb, &work_sccb, +- be16_to_cpu(work_sccb.h.length)); ++ cpu_physical_memory_write(sccb, work_sccb, ++ be16_to_cpu(work_sccb->h.length)); + + sclp_c->service_interrupt(sclp, sccb); + +diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h +index c0a3faa37d7..55f53a46540 100644 +--- a/include/hw/s390x/sclp.h ++++ b/include/hw/s390x/sclp.h +@@ -177,7 +177,7 @@ typedef struct IoaCfgSccb { + + typedef struct SCCB { + SCCBHeader h; +- char data[SCCB_DATA_LEN]; ++ char data[]; + } QEMU_PACKED SCCB; + + #define TYPE_SCLP "sclp" +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-sclp-rework-sclp-boundary-checks.patch b/SOURCES/kvm-s390-sclp-rework-sclp-boundary-checks.patch new file mode 100644 index 0000000..9bb3a55 --- /dev/null +++ b/SOURCES/kvm-s390-sclp-rework-sclp-boundary-checks.patch @@ -0,0 +1,80 @@ +From bc395a979a00bb3e16f3bd92b5b2006db4a5aee3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:07 -0500 +Subject: [PATCH 07/18] s390/sclp: rework sclp boundary checks + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-4-thuth@redhat.com> +Patchwork-id: 99500 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 03/12] s390/sclp: rework sclp boundary checks +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +Rework the SCLP boundary check to account for different SCLP commands +(eventually) allowing different boundary sizes. + +Signed-off-by: Collin Walling +Reviewed-by: Cornelia Huck +Reviewed-by: Thomas Huth +Acked-by: Janosch Frank +Reviewed-by: Claudio Imbrenda +Message-Id: <20200915194416.107460-3-walling@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit db13387ca01a69d870cc16dd232375c2603596f2) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index fe7d0fece80..38278497319 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -49,6 +49,18 @@ static inline bool sclp_command_code_valid(uint32_t code) + return false; + } + ++static bool sccb_verify_boundary(uint64_t sccb_addr, uint16_t sccb_len) ++{ ++ uint64_t sccb_max_addr = sccb_addr + sccb_len - 1; ++ uint64_t sccb_boundary = (sccb_addr & PAGE_MASK) + PAGE_SIZE; ++ ++ if (sccb_max_addr < sccb_boundary) { ++ return true; ++ } ++ ++ return false; ++} ++ + static void prepare_cpu_entries(MachineState *ms, CPUEntry *entry, int *count) + { + uint8_t features[SCCB_CPU_FEATURE_LEN] = { 0 }; +@@ -229,6 +241,11 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, + goto out_write; + } + ++ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { ++ work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); ++ goto out_write; ++ } ++ + sclp_c->execute(sclp, &work_sccb, code); + out_write: + s390_cpu_pv_mem_write(env_archcpu(env), 0, &work_sccb, +@@ -274,7 +291,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + goto out_write; + } + +- if ((sccb + be16_to_cpu(work_sccb.h.length)) > ((sccb & PAGE_MASK) + PAGE_SIZE)) { ++ if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb.h.length))) { + work_sccb.h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); + goto out_write; + } +-- +2.27.0 + diff --git a/SOURCES/kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch b/SOURCES/kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch new file mode 100644 index 0000000..cb99830 --- /dev/null +++ b/SOURCES/kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch @@ -0,0 +1,67 @@ +From adf66c037e60d66f864960b24c746b767efb10b9 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:10 -0500 +Subject: [PATCH 10/18] s390/sclp: use cpu offset to locate cpu entries + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-7-thuth@redhat.com> +Patchwork-id: 99503 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 06/12] s390/sclp: use cpu offset to locate cpu entries +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Collin Walling + +The start of the CPU entry region in the Read SCP Info response data is +denoted by the offset_cpu field. As such, QEMU needs to begin creating +entries at this address. + +This is in preparation for when Read SCP Info inevitably introduces new +bytes that push the start of the CPUEntry field further away. + +Read CPU Info is unlikely to ever change, so let's not bother +accounting for the offset there. + +Signed-off-by: Collin Walling +Reviewed-by: Thomas Huth +Reviewed-by: Cornelia Huck +Reviewed-by: Claudio Imbrenda +Message-Id: <20200915194416.107460-6-walling@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 1a7a568859473b1cda39a015493c5c82bb200281) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index 2b4c6c5cfad..017989b3888 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -89,6 +89,8 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + int rnsize, rnmax; + IplParameterBlock *ipib = s390_ipl_get_iplb(); + int required_len = SCCB_REQ_LEN(ReadInfo, machine->possible_cpus->len); ++ int offset_cpu = offsetof(ReadInfo, entries); ++ CPUEntry *entries_start = (void *)sccb + offset_cpu; + + if (be16_to_cpu(sccb->h.length) < required_len) { + sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH); +@@ -96,9 +98,9 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) + } + + /* CPU information */ +- prepare_cpu_entries(machine, read_info->entries, &cpu_count); ++ prepare_cpu_entries(machine, entries_start, &cpu_count); + read_info->entries_cpu = cpu_to_be16(cpu_count); +- read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries)); ++ read_info->offset_cpu = cpu_to_be16(offset_cpu); + read_info->highest_cpu = cpu_to_be16(machine->smp.max_cpus - 1); + + read_info->ibc_val = cpu_to_be32(s390_get_ibc_val()); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-fix-build-for-without-default-devices.patch b/SOURCES/kvm-s390x-fix-build-for-without-default-devices.patch new file mode 100644 index 0000000..6567c04 --- /dev/null +++ b/SOURCES/kvm-s390x-fix-build-for-without-default-devices.patch @@ -0,0 +1,74 @@ +From d86158eeb752242791e3f94172ed020204040250 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 19 Jan 2021 12:50:46 -0500 +Subject: [PATCH 7/7] s390x: fix build for --without-default-devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cornelia Huck +Message-id: <20210119125046.472811-8-cohuck@redhat.com> +Patchwork-id: 100681 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 7/7] s390x: fix build for --without-default-devices +Bugzilla: 1905391 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +s390-pci-vfio.c calls into the vfio code, so we need it to be +built conditionally on vfio (which implies CONFIG_LINUX). + +Fixes: cd7498d07fbb ("s390x/pci: Add routine to get the vfio dma available count") +Reported-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Matthew Rosato +Message-Id: <20201103123237.718242-1-cohuck@redhat.com> +Acked-by: Greg Kurz +Tested-by: Greg Kurz +Signed-off-by: Cornelia Huck +(cherry picked from commit 77280d33bc9cfdbfb5b5d462259d644f5aefe9b3) +Signed-off-by: Cornelia Huck + + Conflicts: + hw/s390x/meson.build + include/hw/s390x/s390-pci-vfio.h + --> adaptions due to missing Meson rework + +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/Makefile.objs | 2 +- + include/hw/s390x/s390-pci-vfio.h | 3 ++- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs +index 43756c9437d..dbef4b8906c 100644 +--- a/hw/s390x/Makefile.objs ++++ b/hw/s390x/Makefile.objs +@@ -7,7 +7,7 @@ obj-y += ipl.o + obj-y += css.o + obj-$(CONFIG_S390_CCW_VIRTIO) += s390-virtio-ccw.o + obj-$(CONFIG_TERMINAL3270) += 3270-ccw.o +-obj-$(CONFIG_LINUX) += s390-pci-vfio.o ++obj-$(CONFIG_VFIO) += s390-pci-vfio.o + ifeq ($(CONFIG_VIRTIO_CCW),y) + obj-y += virtio-ccw.o + obj-$(CONFIG_VIRTIO_SERIAL) += virtio-ccw-serial.o +diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h +index 539bcf04eb5..685b136d46b 100644 +--- a/include/hw/s390x/s390-pci-vfio.h ++++ b/include/hw/s390x/s390-pci-vfio.h +@@ -13,8 +13,9 @@ + #define HW_S390_PCI_VFIO_H + + #include "hw/s390x/s390-pci-bus.h" ++#include "config-devices.h" + +-#ifdef CONFIG_LINUX ++#ifdef CONFIG_VFIO + bool s390_pci_update_dma_avail(int fd, unsigned int *avail); + S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + S390PCIBusDevice *pbdev); +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch b/SOURCES/kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch new file mode 100644 index 0000000..5e48efb --- /dev/null +++ b/SOURCES/kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch @@ -0,0 +1,150 @@ +From 3927f54a56e29003b84e0e3726d3a0170681128b Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 19 Jan 2021 12:50:44 -0500 +Subject: [PATCH 5/7] s390x/pci: Add routine to get the vfio dma available + count + +RH-Author: Cornelia Huck +Message-id: <20210119125046.472811-6-cohuck@redhat.com> +Patchwork-id: 100679 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 5/7] s390x/pci: Add routine to get the vfio dma available count +Bugzilla: 1905391 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +From: Matthew Rosato + +Create new files for separating out vfio-specific work for s390 +pci. Add the first such routine, which issues VFIO_IOMMU_GET_INFO +ioctl to collect the current dma available count. + +Signed-off-by: Matthew Rosato +Reviewed-by: Cornelia Huck +[aw: Fix non-Linux build with CONFIG_LINUX] +Signed-off-by: Alex Williamson +(cherry picked from commit cd7498d07fbb20fa04790ff7ee168a8a8d01cb30) +Signed-off-by: Cornelia Huck + + Conflicts: + hw/s390x/meson.build + --> added the file in hw/s390x/Makefile.objs instead, + since we do not use Meson yet + hw/s390x/s390-pci-vfio.c + --> NULL-initialize "info" to avoid a downstream-only + compiler warning + +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/Makefile.objs | 1 + + hw/s390x/s390-pci-vfio.c | 54 ++++++++++++++++++++++++++++++++ + include/hw/s390x/s390-pci-vfio.h | 24 ++++++++++++++ + 3 files changed, 79 insertions(+) + create mode 100644 hw/s390x/s390-pci-vfio.c + create mode 100644 include/hw/s390x/s390-pci-vfio.h + +diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs +index c4086ec3171..43756c9437d 100644 +--- a/hw/s390x/Makefile.objs ++++ b/hw/s390x/Makefile.objs +@@ -7,6 +7,7 @@ obj-y += ipl.o + obj-y += css.o + obj-$(CONFIG_S390_CCW_VIRTIO) += s390-virtio-ccw.o + obj-$(CONFIG_TERMINAL3270) += 3270-ccw.o ++obj-$(CONFIG_LINUX) += s390-pci-vfio.o + ifeq ($(CONFIG_VIRTIO_CCW),y) + obj-y += virtio-ccw.o + obj-$(CONFIG_VIRTIO_SERIAL) += virtio-ccw-serial.o +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +new file mode 100644 +index 00000000000..0eb22ffec4c +--- /dev/null ++++ b/hw/s390x/s390-pci-vfio.c +@@ -0,0 +1,54 @@ ++/* ++ * s390 vfio-pci interfaces ++ * ++ * Copyright 2020 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#include ++ ++#include "qemu/osdep.h" ++#include "hw/s390x/s390-pci-vfio.h" ++#include "hw/vfio/vfio-common.h" ++ ++/* ++ * Get the current DMA available count from vfio. Returns true if vfio is ++ * limiting DMA requests, false otherwise. The current available count read ++ * from vfio is returned in avail. ++ */ ++bool s390_pci_update_dma_avail(int fd, unsigned int *avail) ++{ ++ g_autofree struct vfio_iommu_type1_info *info = NULL; ++ uint32_t argsz; ++ ++ assert(avail); ++ ++ argsz = sizeof(struct vfio_iommu_type1_info); ++ info = g_malloc0(argsz); ++ ++ /* ++ * If the specified argsz is not large enough to contain all capabilities ++ * it will be updated upon return from the ioctl. Retry until we have ++ * a big enough buffer to hold the entire capability chain. ++ */ ++retry: ++ info->argsz = argsz; ++ ++ if (ioctl(fd, VFIO_IOMMU_GET_INFO, info)) { ++ return false; ++ } ++ ++ if (info->argsz > argsz) { ++ argsz = info->argsz; ++ info = g_realloc(info, argsz); ++ goto retry; ++ } ++ ++ /* If the capability exists, update with the current value */ ++ return vfio_get_info_dma_avail(info, avail); ++} ++ +diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h +new file mode 100644 +index 00000000000..1727292e9b5 +--- /dev/null ++++ b/include/hw/s390x/s390-pci-vfio.h +@@ -0,0 +1,24 @@ ++/* ++ * s390 vfio-pci interfaces ++ * ++ * Copyright 2020 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#ifndef HW_S390_PCI_VFIO_H ++#define HW_S390_PCI_VFIO_H ++ ++#ifdef CONFIG_LINUX ++bool s390_pci_update_dma_avail(int fd, unsigned int *avail); ++#else ++static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) ++{ ++ return false; ++} ++#endif ++ ++#endif +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch b/SOURCES/kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch new file mode 100644 index 0000000..13fd6b7 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch @@ -0,0 +1,357 @@ +From 7ef9b9c593da98ad32ad20c28d17bb2700a35c29 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 19 Jan 2021 12:50:45 -0500 +Subject: [PATCH 6/7] s390x/pci: Honor DMA limits set by vfio + +RH-Author: Cornelia Huck +Message-id: <20210119125046.472811-7-cohuck@redhat.com> +Patchwork-id: 100680 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 6/7] s390x/pci: Honor DMA limits set by vfio +Bugzilla: 1905391 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +From: Matthew Rosato + +When an s390 guest is using lazy unmapping, it can result in a very +large number of oustanding DMA requests, far beyond the default +limit configured for vfio. Let's track DMA usage similar to vfio +in the host, and trigger the guest to flush their DMA mappings +before vfio runs out. + +Signed-off-by: Matthew Rosato +Reviewed-by: Cornelia Huck +[aw: non-Linux build fixes] +Signed-off-by: Alex Williamson +(cherry picked from commit 37fa32de707340f3a93959ad5a1ebc41ba1520ee) +Signed-off-by: Cornelia Huck + + Conflicts: + hw/s390x/s390-pci-bus.c + --> adapt to missing 981c3dcd9489 ("qdev: Convert to + qdev_unrealize() with Coccinelle") + hw/s390x/s390-pci-inst.c + --> adapt to out of order inclusion of 5039caf3c449 ("memory: + Add IOMMUTLBEvent") + include/hw/s390x/s390-pci-bus.h + --> adapt to missing db1015e92e04 ("Move QOM typedefs and + add missing includes") + +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-pci-bus.c | 16 ++++++++---- + hw/s390x/s390-pci-inst.c | 45 +++++++++++++++++++++++++++----- + hw/s390x/s390-pci-vfio.c | 42 +++++++++++++++++++++++++++++ + include/hw/s390x/s390-pci-bus.h | 9 +++++++ + include/hw/s390x/s390-pci-inst.h | 3 +++ + include/hw/s390x/s390-pci-vfio.h | 12 +++++++++ + 6 files changed, 116 insertions(+), 11 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 6daef2b6d57..a9f6f550472 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -17,6 +17,7 @@ + #include "cpu.h" + #include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-inst.h" ++#include "hw/s390x/s390-pci-vfio.h" + #include "hw/pci/pci_bus.h" + #include "hw/qdev-properties.h" + #include "hw/pci/pci_bridge.h" +@@ -771,6 +772,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) + s->bus_no = 0; + QTAILQ_INIT(&s->pending_sei); + QTAILQ_INIT(&s->zpci_devs); ++ QTAILQ_INIT(&s->zpci_dma_limit); + + css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, + S390_ADAPTER_SUPPRESSIBLE, &local_err); +@@ -951,17 +953,18 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + } + } + ++ pbdev->pdev = pdev; ++ pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); ++ pbdev->iommu->pbdev = pbdev; ++ pbdev->state = ZPCI_FS_DISABLED; ++ + if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { + pbdev->fh |= FH_SHM_VFIO; ++ pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); + } else { + pbdev->fh |= FH_SHM_EMUL; + } + +- pbdev->pdev = pdev; +- pbdev->iommu = s390_pci_get_iommu(s, pci_get_bus(pdev), pdev->devfn); +- pbdev->iommu->pbdev = pbdev; +- pbdev->state = ZPCI_FS_DISABLED; +- + if (s390_pci_msix_init(pbdev)) { + error_setg(errp, "MSI-X support is mandatory " + "in the S390 architecture"); +@@ -1014,6 +1017,9 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->fid = 0; + QTAILQ_REMOVE(&s->zpci_devs, pbdev, link); + g_hash_table_remove(s->zpci_table, &pbdev->idx); ++ if (pbdev->iommu->dma_limit) { ++ s390_pci_end_dma_count(s, pbdev->iommu->dma_limit); ++ } + object_property_set_bool(OBJECT(dev), false, "realized", NULL); + } + } +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index b1885344f18..edbdf727984 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -32,6 +32,20 @@ + } \ + } while (0) + ++static inline void inc_dma_avail(S390PCIIOMMU *iommu) ++{ ++ if (iommu->dma_limit) { ++ iommu->dma_limit->avail++; ++ } ++} ++ ++static inline void dec_dma_avail(S390PCIIOMMU *iommu) ++{ ++ if (iommu->dma_limit) { ++ iommu->dma_limit->avail--; ++ } ++} ++ + static void s390_set_status_code(CPUS390XState *env, + uint8_t r, uint64_t status_code) + { +@@ -572,7 +586,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + return 0; + } + +-static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) ++static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, ++ S390IOTLBEntry *entry) + { + S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova); + IOMMUTLBEvent event = { +@@ -588,14 +603,15 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) + + if (event.type == IOMMU_NOTIFIER_UNMAP) { + if (!cache) { +- return; ++ goto out; + } + g_hash_table_remove(iommu->iotlb, &entry->iova); ++ inc_dma_avail(iommu); + } else { + if (cache) { + if (cache->perm == entry->perm && + cache->translated_addr == entry->translated_addr) { +- return; ++ goto out; + } + + event.type = IOMMU_NOTIFIER_UNMAP; +@@ -611,9 +627,13 @@ static void s390_pci_update_iotlb(S390PCIIOMMU *iommu, S390IOTLBEntry *entry) + cache->len = PAGE_SIZE; + cache->perm = entry->perm; + g_hash_table_replace(iommu->iotlb, &cache->iova, cache); ++ dec_dma_avail(iommu); + } + + memory_region_notify_iommu(&iommu->iommu_mr, 0, event); ++ ++out: ++ return iommu->dma_limit ? iommu->dma_limit->avail : 1; + } + + int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) +@@ -625,6 +645,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + S390PCIIOMMU *iommu; + S390IOTLBEntry entry; + hwaddr start, end; ++ uint32_t dma_avail; + + if (env->psw.mask & PSW_MASK_PSTATE) { + s390_program_interrupt(env, PGM_PRIVILEGED, ra); +@@ -663,6 +684,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + + iommu = pbdev->iommu; ++ if (iommu->dma_limit) { ++ dma_avail = iommu->dma_limit->avail; ++ } else { ++ dma_avail = 1; ++ } + if (!iommu->g_iota) { + error = ERR_EVENT_INVALAS; + goto err; +@@ -680,8 +706,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + + start += entry.len; +- while (entry.iova < start && entry.iova < end) { +- s390_pci_update_iotlb(iommu, &entry); ++ while (entry.iova < start && entry.iova < end && ++ (dma_avail > 0 || entry.perm == IOMMU_NONE)) { ++ dma_avail = s390_pci_update_iotlb(iommu, &entry); + entry.iova += PAGE_SIZE; + entry.translated_addr += PAGE_SIZE; + } +@@ -694,7 +721,13 @@ err: + s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0); + } else { + pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++; +- setcc(cpu, ZPCI_PCI_LS_OK); ++ if (dma_avail > 0) { ++ setcc(cpu, ZPCI_PCI_LS_OK); ++ } else { ++ /* vfio DMA mappings are exhausted, trigger a RPCIT */ ++ setcc(cpu, ZPCI_PCI_LS_ERR); ++ s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES); ++ } + } + return 0; + } +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 0eb22ffec4c..01c1e8ac89a 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -12,7 +12,9 @@ + #include + + #include "qemu/osdep.h" ++#include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-vfio.h" ++#include "hw/vfio/pci.h" + #include "hw/vfio/vfio-common.h" + + /* +@@ -52,3 +54,43 @@ retry: + return vfio_get_info_dma_avail(info, avail); + } + ++S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, ++ S390PCIBusDevice *pbdev) ++{ ++ S390PCIDMACount *cnt; ++ uint32_t avail; ++ VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ int id; ++ ++ assert(vpdev); ++ ++ id = vpdev->vbasedev.group->container->fd; ++ ++ if (!s390_pci_update_dma_avail(id, &avail)) { ++ return NULL; ++ } ++ ++ QTAILQ_FOREACH(cnt, &s->zpci_dma_limit, link) { ++ if (cnt->id == id) { ++ cnt->users++; ++ return cnt; ++ } ++ } ++ ++ cnt = g_new0(S390PCIDMACount, 1); ++ cnt->id = id; ++ cnt->users = 1; ++ cnt->avail = avail; ++ QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); ++ return cnt; ++} ++ ++void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt) ++{ ++ assert(cnt); ++ ++ cnt->users--; ++ if (cnt->users == 0) { ++ QTAILQ_REMOVE(&s->zpci_dma_limit, cnt, link); ++ } ++} +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 550f3cc5e92..2f2edbd0bf3 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -266,6 +266,13 @@ typedef struct S390IOTLBEntry { + } S390IOTLBEntry; + + typedef struct S390PCIBusDevice S390PCIBusDevice; ++typedef struct S390PCIDMACount { ++ int id; ++ int users; ++ uint32_t avail; ++ QTAILQ_ENTRY(S390PCIDMACount) link; ++} S390PCIDMACount; ++ + typedef struct S390PCIIOMMU { + Object parent_obj; + S390PCIBusDevice *pbdev; +@@ -277,6 +284,7 @@ typedef struct S390PCIIOMMU { + uint64_t pba; + uint64_t pal; + GHashTable *iotlb; ++ S390PCIDMACount *dma_limit; + } S390PCIIOMMU; + + typedef struct S390PCIIOMMUTable { +@@ -352,6 +360,7 @@ typedef struct S390pciState { + GHashTable *zpci_table; + QTAILQ_HEAD(, SeiContainer) pending_sei; + QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs; ++ QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit; + } S390pciState; + + S390pciState *s390_get_phb(void); +diff --git a/include/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h +index fa3bf8b5aad..8ee3a3c2375 100644 +--- a/include/hw/s390x/s390-pci-inst.h ++++ b/include/hw/s390x/s390-pci-inst.h +@@ -254,6 +254,9 @@ typedef struct ClpReqRspQueryPciGrp { + #define ZPCI_STPCIFC_ST_INVAL_DMAAS 28 + #define ZPCI_STPCIFC_ST_ERROR_RECOVER 40 + ++/* Refresh PCI Translations status codes */ ++#define ZPCI_RPCIT_ST_INSUFF_RES 16 ++ + /* FIB function controls */ + #define ZPCI_FIB_FC_ENABLED 0x80 + #define ZPCI_FIB_FC_ERROR 0x40 +diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h +index 1727292e9b5..539bcf04eb5 100644 +--- a/include/hw/s390x/s390-pci-vfio.h ++++ b/include/hw/s390x/s390-pci-vfio.h +@@ -12,13 +12,25 @@ + #ifndef HW_S390_PCI_VFIO_H + #define HW_S390_PCI_VFIO_H + ++#include "hw/s390x/s390-pci-bus.h" ++ + #ifdef CONFIG_LINUX + bool s390_pci_update_dma_avail(int fd, unsigned int *avail); ++S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, ++ S390PCIBusDevice *pbdev); ++void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt); + #else + static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) + { + return false; + } ++static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, ++ S390PCIBusDevice *pbdev) ++{ ++ return NULL; ++} ++static inline void s390_pci_end_dma_count(S390pciState *s, ++ S390PCIDMACount *cnt) { } + #endif + + #endif +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch b/SOURCES/kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch new file mode 100644 index 0000000..27e5fa2 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch @@ -0,0 +1,110 @@ +From 73fb2438518ef2073f2486fcf1dd8cddffb29228 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 19 Jan 2021 12:50:41 -0500 +Subject: [PATCH 2/7] s390x/pci: Move header files to include/hw/s390x + +RH-Author: Cornelia Huck +Message-id: <20210119125046.472811-3-cohuck@redhat.com> +Patchwork-id: 100676 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/7] s390x/pci: Move header files to include/hw/s390x +Bugzilla: 1905391 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +From: Matthew Rosato + +Seems a more appropriate location for them. + +Signed-off-by: Matthew Rosato +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +(cherry picked from commit 408b55db8be3e3edae041d46ef8786fabc1476aa) +Signed-off-by: Cornelia Huck + + Conflicts: + hw/s390x/s390-virtio-ccw.c + --> context diff + +Signed-off-by: Danilo C. L. de Paula +--- + MAINTAINERS | 1 + + hw/s390x/s390-pci-bus.c | 4 ++-- + hw/s390x/s390-pci-inst.c | 4 ++-- + hw/s390x/s390-virtio-ccw.c | 2 +- + {hw => include/hw}/s390x/s390-pci-bus.h | 0 + {hw => include/hw}/s390x/s390-pci-inst.h | 0 + 6 files changed, 6 insertions(+), 5 deletions(-) + rename {hw => include/hw}/s390x/s390-pci-bus.h (100%) + rename {hw => include/hw}/s390x/s390-pci-inst.h (100%) + +diff --git a/MAINTAINERS b/MAINTAINERS +index 2742c955754..56ca8193d86 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1225,6 +1225,7 @@ S390 PCI + M: Matthew Rosato + S: Supported + F: hw/s390x/s390-pci* ++F: include/hw/s390x/s390-pci* + L: qemu-s390x@nongnu.org + + UniCore32 Machines +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 2d2f4a7c419..6daef2b6d57 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -15,8 +15,8 @@ + #include "qapi/error.h" + #include "qapi/visitor.h" + #include "cpu.h" +-#include "s390-pci-bus.h" +-#include "s390-pci-inst.h" ++#include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/s390-pci-inst.h" + #include "hw/pci/pci_bus.h" + #include "hw/qdev-properties.h" + #include "hw/pci/pci_bridge.h" +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 27b189e6d75..b1885344f18 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -13,12 +13,12 @@ + + #include "qemu/osdep.h" + #include "cpu.h" +-#include "s390-pci-inst.h" +-#include "s390-pci-bus.h" + #include "exec/memop.h" + #include "exec/memory-internal.h" + #include "qemu/error-report.h" + #include "sysemu/hw_accel.h" ++#include "hw/s390x/s390-pci-inst.h" ++#include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/tod.h" + + #ifndef DEBUG_S390PCI_INST +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 5b3d07f55c4..101f3b7c6e1 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -27,7 +27,7 @@ + #include "qemu/ctype.h" + #include "qemu/error-report.h" + #include "qemu/option.h" +-#include "s390-pci-bus.h" ++#include "hw/s390x/s390-pci-bus.h" + #include "sysemu/reset.h" + #include "hw/s390x/storage-keys.h" + #include "hw/s390x/storage-attributes.h" +diff --git a/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +similarity index 100% +rename from hw/s390x/s390-pci-bus.h +rename to include/hw/s390x/s390-pci-bus.h +diff --git a/hw/s390x/s390-pci-inst.h b/include/hw/s390x/s390-pci-inst.h +similarity index 100% +rename from hw/s390x/s390-pci-inst.h +rename to include/hw/s390x/s390-pci-inst.h +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pv-Fix-diag318-PV-fencing.patch b/SOURCES/kvm-s390x-pv-Fix-diag318-PV-fencing.patch new file mode 100644 index 0000000..4dcb862 --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Fix-diag318-PV-fencing.patch @@ -0,0 +1,114 @@ +From 722078f9fdb766c2f0990145de6732f0c36a63b7 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:16 -0500 +Subject: [PATCH 16/18] s390x: pv: Fix diag318 PV fencing + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-13-thuth@redhat.com> +Patchwork-id: 99509 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 12/12] s390x: pv: Fix diag318 PV fencing +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +Diag318 fencing needs to be determined on the current VM PV state and +not on the state that the VM has when we create the CPU model. + +Fixes: fabdada935 ("s390: guest support for diagnose 0x318") +Reported-by: Marc Hartmayer +Signed-off-by: Janosch Frank +Tested-by: Marc Hartmayer +Reviewed-by: Christian Borntraeger +Reviewed-by: Collin Walling +Acked-by: David Hildenbrand +Message-Id: <20201022103135.126033-3-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 3ded270a2697852a71961b45291519ae044f25e3) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu_features.c | 5 +++++ + target/s390x/cpu_features.h | 4 ++++ + target/s390x/cpu_models.c | 4 ++++ + target/s390x/kvm.c | 3 +-- + 4 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c +index 9f817e3cfa7..e5cdf232607 100644 +--- a/target/s390x/cpu_features.c ++++ b/target/s390x/cpu_features.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qemu/module.h" + #include "cpu_features.h" ++#include "hw/s390x/pv.h" + + #define DEF_FEAT(_FEAT, _NAME, _TYPE, _BIT, _DESC) \ + [S390_FEAT_##_FEAT] = { \ +@@ -105,6 +106,10 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type, + } + feat = find_next_bit(features, S390_FEAT_MAX, feat + 1); + } ++ ++ if (type == S390_FEAT_TYPE_SCLP_FAC134 && s390_is_pv()) { ++ clear_be_bit(s390_feat_def(S390_FEAT_DIAG_318)->bit, data); ++ } + } + + void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type, +diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h +index f74f7fc3a11..d3c685a04c8 100644 +--- a/target/s390x/cpu_features.h ++++ b/target/s390x/cpu_features.h +@@ -81,6 +81,10 @@ const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup group); + + #define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1)) + ++static inline void clear_be_bit(unsigned int bit_nr, uint8_t *array) ++{ ++ array[bit_nr / 8] &= ~(0x80 >> (bit_nr % 8)); ++} + static inline void set_be_bit(unsigned int bit_nr, uint8_t *array) + { + array[bit_nr / 8] |= 0x80 >> (bit_nr % 8); +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index bf6a3faba9e..d489923cb8a 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -29,6 +29,7 @@ + #include "hw/pci/pci.h" + #endif + #include "qapi/qapi-commands-machine-target.h" ++#include "hw/s390x/pv.h" + + #define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \ + { \ +@@ -238,6 +239,9 @@ bool s390_has_feat(S390Feat feat) + } + return 0; + } ++ if (feat == S390_FEAT_DIAG_318 && s390_is_pv()) { ++ return false; ++ } + return test_bit(feat, cpu->model->features); + } + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index e5e190d21c9..6edb52f6d25 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -2483,8 +2483,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) + */ + set_bit(S390_FEAT_EXTENDED_LENGTH_SCCB, model->features); + +- /* DIAGNOSE 0x318 is not supported under protected virtualization */ +- if (!s390_is_pv() && kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) { ++ if (kvm_check_extension(kvm_state, KVM_CAP_S390_DIAG318)) { + set_bit(S390_FEAT_DIAG_318, model->features); + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pv-Remove-sclp-boundary-checks.patch b/SOURCES/kvm-s390x-pv-Remove-sclp-boundary-checks.patch new file mode 100644 index 0000000..51ceb48 --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Remove-sclp-boundary-checks.patch @@ -0,0 +1,57 @@ +From cf3d958b14e21fde929e67262b6e192592d95359 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:15 -0500 +Subject: [PATCH 15/18] s390x: pv: Remove sclp boundary checks + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-12-thuth@redhat.com> +Patchwork-id: 99508 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 11/12] s390x: pv: Remove sclp boundary checks +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Janosch Frank + +The SCLP boundary cross check is done by the Ultravisor for a +protected guest, hence we don't need to do it. As QEMU doesn't get a +valid SCCB address in protected mode this is even problematic and can +lead to QEMU reporting a false boundary cross error. + +Fixes: db13387ca0 ("s390/sclp: rework sclp boundary checks") +Reported-by: Marc Hartmayer +Signed-off-by: Janosch Frank +Tested-by: Marc Hartmayer +Reviewed-by: Christian Borntraeger +Reviewed-by: Thomas Huth +Reviewed-by: Collin Walling +Acked-by: Halil Pasic +Acked-by: David Hildenbrand +Message-Id: <20201022103135.126033-2-frankja@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 3df4843d0e612a3c838e8d94c3e9c24520f2e680) +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index 2931046f456..03f847b2c8a 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -285,11 +285,6 @@ int sclp_service_call_protected(CPUS390XState *env, uint64_t sccb, + goto out_write; + } + +- if (!sccb_verify_boundary(sccb, be16_to_cpu(work_sccb->h.length), code)) { +- work_sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); +- goto out_write; +- } +- + sclp_c->execute(sclp, work_sccb, code); + out_write: + s390_cpu_pv_mem_write(env_archcpu(env), 0, work_sccb, +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch new file mode 100644 index 0000000..f90dc30 --- /dev/null +++ b/SOURCES/kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch @@ -0,0 +1,52 @@ +From fa4e13a01ecc316cc43c1f39490330b94c910bc1 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 14 Dec 2020 18:29:49 -0500 +Subject: [PATCH 04/14] s390x/s390-virtio-ccw: Reset PCI devices during + subsystem reset + +RH-Author: Thomas Huth +Message-id: <20201214182949.35712-2-thuth@redhat.com> +Patchwork-id: 100440 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] s390x/s390-virtio-ccw: Reset PCI devices during subsystem reset +Bugzilla: 1905386 +RH-Acked-by: Danilo de Paula +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +From: Matthew Rosato + +Currently, a subsystem reset event leaves PCI devices enabled, causing +issues post-reset in the guest (an example would be after a kexec). These +devices need to be reset during a subsystem reset, allowing them to be +properly re-enabled afterwards. Add the S390 PCI host bridge to the list +of qdevs to be reset during subsystem reset. + +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Acked-by: Halil Pasic +Acked-by: Christian Borntraeger +Cc: qemu-stable@nongnu.org +Message-Id: <1602767767-32713-1-git-send-email-mjrosato@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit db08244a3a7ec312dfed3fd9b88e114281215458) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 5905d2b7adc..5b3d07f55c4 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -103,6 +103,7 @@ static const char *const reset_dev_types[] = { + "s390-sclp-event-facility", + "s390-flic", + "diag288", ++ TYPE_S390_PCI_HOST_BRIDGE, + }; + + static void subsystem_reset(void) +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch b/SOURCES/kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch new file mode 100644 index 0000000..5a38a88 --- /dev/null +++ b/SOURCES/kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch @@ -0,0 +1,90 @@ +From 8b06cba98e37b9c50e2a9deb1567d8cf4e1ba2b6 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 11 Nov 2020 12:03:05 -0500 +Subject: [PATCH 05/18] s390x/sclp.c: remove unneeded label in + sclp_service_call() + +RH-Author: Thomas Huth +Message-id: <20201111120316.707489-2-thuth@redhat.com> +Patchwork-id: 99497 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 01/12] s390x/sclp.c: remove unneeded label in sclp_service_call() +Bugzilla: 1798506 +RH-Acked-by: Jens Freimann +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +From: Daniel Henrique Barboza + +'out' label can be replaced by 'return' with the appropriate +value. The 'r' integer, which is used solely to set the +return value for this label, can also be removed. + +CC: Cornelia Huck +CC: Halil Pasic +CC: Christian Borntraeger +Signed-off-by: Daniel Henrique Barboza +Reviewed-by: Thomas Huth +Message-Id: <20200106182425.20312-39-danielhb413@gmail.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit e6de76fca48012348d8c81b1399c861f444bd4a4) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/sclp.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c +index 1c380a49cc7..d8ae207731f 100644 +--- a/hw/s390x/sclp.c ++++ b/hw/s390x/sclp.c +@@ -241,24 +241,20 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + { + SCLPDevice *sclp = get_sclp_device(); + SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp); +- int r = 0; + SCCB work_sccb; + + hwaddr sccb_len = sizeof(SCCB); + + /* first some basic checks on program checks */ + if (env->psw.mask & PSW_MASK_PSTATE) { +- r = -PGM_PRIVILEGED; +- goto out; ++ return -PGM_PRIVILEGED; + } + if (cpu_physical_memory_is_io(sccb)) { +- r = -PGM_ADDRESSING; +- goto out; ++ return -PGM_ADDRESSING; + } + if ((sccb & ~0x1fffUL) == 0 || (sccb & ~0x1fffUL) == env->psa + || (sccb & ~0x7ffffff8UL) != 0) { +- r = -PGM_SPECIFICATION; +- goto out; ++ return -PGM_SPECIFICATION; + } + + /* +@@ -270,8 +266,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code) + + /* Valid sccb sizes */ + if (be16_to_cpu(work_sccb.h.length) < sizeof(SCCBHeader)) { +- r = -PGM_SPECIFICATION; +- goto out; ++ return -PGM_SPECIFICATION; + } + + if (!sclp_command_code_valid(code)) { +@@ -291,8 +286,7 @@ out_write: + + sclp_c->service_interrupt(sclp, sccb); + +-out: +- return r; ++ return 0; + } + + static void service_interrupt(SCLPDevice *sclp, uint32_t sccb) +-- +2.27.0 + diff --git a/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch b/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch new file mode 100644 index 0000000..189be7e --- /dev/null +++ b/SOURCES/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch @@ -0,0 +1,79 @@ +From 08dc2a4dc481916fae9597220ad0faf3f6ed70c1 Mon Sep 17 00:00:00 2001 +From: Eduardo Otubo +Date: Mon, 16 Nov 2020 15:15:38 -0500 +Subject: [PATCH 1/5] seccomp: fix killing of whole process instead of thread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eduardo Otubo +Message-id: <20201116151538.22254-1-otubo@redhat.com> +Patchwork-id: 99654 +O-Subject: [RHEL-8.3.0/RHEL-8.4.0 qemu-kvm PATCH] seccomp: fix killing of whole process instead of thread +Bugzilla: 1880546 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula +RH-Acked-by: Marc-André Lureau + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1890885 +BRANCH: rhel-8.3.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=1890885 + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1880546 +BRANCH: rhel-8.4.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=33125023 + +From: Daniel P. Berrangé + +Back in 2018 we introduced support for killing the whole QEMU process +instead of just one thread, when a seccomp rule is violated: + + commit bda08a5764d470f101fa38635d30b41179a313e1 + Author: Marc-André Lureau + Date: Wed Aug 22 19:02:48 2018 +0200 + + seccomp: prefer SCMP_ACT_KILL_PROCESS if available + +Fast forward a year and we introduced a patch to avoid killing the +process for resource control syscalls tickled by Mesa. + + commit 9a1565a03b79d80b236bc7cc2dbce52a2ef3a1b8 + Author: Daniel P. Berrangé + Date: Wed Mar 13 09:49:03 2019 +0000 + + seccomp: don't kill process for resource control syscalls + +Unfortunately a logic bug effectively reverted the first commit +mentioned so that we go back to only killing the thread, not the whole +process. + +Signed-off-by: Daniel P. Berrangé +Reviewed-by: Stefan Hajnoczi +Acked-by: Eduardo Otubo +(cherry picked from commit e474e3aacf4276eb0781d11c45e2fab996f9dc56) +Signed-off-by: Eduardo Otubo +Signed-off-by: Danilo C. L. de Paula +--- + qemu-seccomp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/qemu-seccomp.c b/qemu-seccomp.c +index e0a1829b3dd..8325ecb766e 100644 +--- a/qemu-seccomp.c ++++ b/qemu-seccomp.c +@@ -136,8 +136,9 @@ static uint32_t qemu_seccomp_get_action(int set) + + if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { + kill_process = 1; ++ } else { ++ kill_process = 0; + } +- kill_process = 0; + } + if (kill_process == 1) { + return SCMP_ACT_KILL_PROCESS; +-- +2.27.0 + diff --git a/SOURCES/kvm-slirp-check-pkt_len-before-reading-protocol-header.patch b/SOURCES/kvm-slirp-check-pkt_len-before-reading-protocol-header.patch new file mode 100644 index 0000000..43c44ea --- /dev/null +++ b/SOURCES/kvm-slirp-check-pkt_len-before-reading-protocol-header.patch @@ -0,0 +1,72 @@ +From 2bfa25e55c0a49bc079e5769db2199989eda7745 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Fri, 11 Dec 2020 00:59:26 -0500 +Subject: [PATCH 03/14] slirp: check pkt_len before reading protocol header +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20201211005926.618830-2-jmaloy@redhat.com> +Patchwork-id: 100398 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] slirp: check pkt_len before reading protocol header +Bugzilla: 1902237 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau + +While processing ARP/NCSI packets in 'arp_input' or 'ncsi_input' +routines, ensure that pkt_len is large enough to accommodate the +respective protocol headers, lest it should do an OOB access. +Add check to avoid it. + +CVE-2020-29129 CVE-2020-29130 + QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets + -> https://www.openwall.com/lists/oss-security/2020/11/27/1 + +Reported-by: Qiuhao Li +Signed-off-by: Prasad J Pandit +Message-Id: <20201126135706.273950-1-ppandit@redhat.com> +Reviewed-by: Marc-André Lureau + +(cherry picked from libslirp commit 2e1dcbc0c2af64fcb17009eaf2ceedd81be2b27f) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ncsi.c | 4 ++++ + slirp/src/slirp.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +index 6864b735db4..251c0d2bfbb 100644 +--- a/slirp/src/ncsi.c ++++ b/slirp/src/ncsi.c +@@ -147,6 +147,10 @@ void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) + uint32_t checksum; + uint32_t *pchecksum; + ++ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { ++ return; /* packet too short */ ++ } ++ + memset(ncsi_reply, 0, sizeof(ncsi_reply)); + + memset(reh->h_dest, 0xff, ETH_ALEN); +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +index b0194cb32bb..86b0f52d923 100644 +--- a/slirp/src/slirp.c ++++ b/slirp/src/slirp.c +@@ -700,6 +700,10 @@ static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) + return; + } + ++ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { ++ return; /* packet too short */ ++ } ++ + ar_op = ntohs(ah->ar_op); + switch (ar_op) { + case ARPOP_REQUEST: +-- +2.27.0 + diff --git a/SOURCES/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch b/SOURCES/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch new file mode 100644 index 0000000..7aaa982 --- /dev/null +++ b/SOURCES/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch @@ -0,0 +1,205 @@ +From dfdf950e893c23e77c9dc0be18fca66ad195d260 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Wed, 10 Feb 2021 15:56:45 +0000 +Subject: [PATCH 2/2] spapr: Adjust firmware path of PCI devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +Message-id: <20210210165645.470195-2-gkurz@redhat.com> +Patchwork-id: 101038 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] spapr: Adjust firmware path of PCI devices +Bugzilla: 1912891 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: David Gibson +RH-Acked-by: Laszlo Ersek + +From: Greg Kurz + +It is currently not possible to perform a strict boot from USB storage: + +$ qemu-system-ppc64 -accel kvm -nodefaults -nographic -serial stdio \ + -boot strict=on \ + -device qemu-xhci \ + -device usb-storage,drive=disk,bootindex=0 \ + -blockdev driver=file,node-name=disk,filename=fedora-ppc64le.qcow2 + +SLOF ********************************************************************** +QEMU Starting + Build Date = Jul 17 2020 11:15:24 + FW Version = git-e18ddad8516ff2cf + Press "s" to enter Open Firmware. + +Populating /vdevice methods +Populating /vdevice/vty@71000000 +Populating /vdevice/nvram@71000001 +Populating /pci@800000020000000 + 00 0000 (D) : 1b36 000d serial bus [ usb-xhci ] +No NVRAM common partition, re-initializing... +Scanning USB + XHCI: Initializing + USB Storage + SCSI: Looking for devices + 101000000000000 DISK : "QEMU QEMU HARDDISK 2.5+" +Using default console: /vdevice/vty@71000000 + + Welcome to Open Firmware + + Copyright (c) 2004, 2017 IBM Corporation All rights reserved. + This program and the accompanying materials are made available + under the terms of the BSD License available at + http://www.opensource.org/licenses/bsd-license.php + +Trying to load: from: /pci@800000020000000/usb@0/storage@1/disk@101000000000000 ... +E3405: No such device + +E3407: Load failed + + Type 'boot' and press return to continue booting the system. + Type 'reset-all' and press return to reboot the system. + +Ready! +0 > + +The device tree handed over by QEMU to SLOF indeed contains: + +qemu,boot-list = + "/pci@800000020000000/usb@0/storage@1/disk@101000000000000 HALT"; + +but the device node is named usb-xhci@0, not usb@0. + +This happens because the firmware names of PCI devices returned +by get_boot_devices_list() come from pcibus_get_fw_dev_path(), +while the sPAPR PHB code uses a different naming scheme for +device nodes. This inconsistency has always been there but it was +hidden for a long time because SLOF used to rename USB device +nodes, until this commit, merged in QEMU 4.2.0 : + +commit 85164ad4ed9960cac842fa4cc067c6b6699b0994 +Author: Alexey Kardashevskiy +Date: Wed Sep 11 16:24:32 2019 +1000 + + pseries: Update SLOF firmware image + + This fixes USB host bus adapter name in the device tree to match QEMU's + one. + + Signed-off-by: Alexey Kardashevskiy + Signed-off-by: David Gibson + +Fortunately, sPAPR implements the firmware path provider interface. +This provides a way to override the default firmware paths. + +Just factor out the sPAPR PHB naming logic from spapr_dt_pci_device() +to a helper, and use it in the sPAPR firmware path provider hook. + +Fixes: 85164ad4ed99 ("pseries: Update SLOF firmware image") +Signed-off-by: Greg Kurz +Message-Id: <20210122170157.246374-1-groug@kaod.org> +Reviewed-by: Daniel Henrique Barboza +Signed-off-by: David Gibson +(cherry picked from commit 040bdafce12f750816d879442014df2999a995c4) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 5 +++++ + hw/ppc/spapr_pci.c | 33 ++++++++++++++++++--------------- + include/hw/pci-host/spapr.h | 2 ++ + 3 files changed, 25 insertions(+), 15 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 00b1ef075e..bee2299199 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3013,6 +3013,7 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, + SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE); + SpaprPhbState *phb = CAST(SpaprPhbState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE); + VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON); ++ PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE); + + if (d) { + void *spapr = CAST(void, bus->parent, "spapr-vscsi"); +@@ -3086,6 +3087,10 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, + return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn)); + } + ++ if (pcidev) { ++ return spapr_pci_fw_dev_name(pcidev); ++ } ++ + return NULL; + } + +diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c +index f6fbcf99ed..befa570aa8 100644 +--- a/hw/ppc/spapr_pci.c ++++ b/hw/ppc/spapr_pci.c +@@ -1348,15 +1348,29 @@ static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, + return offset; + } + ++char *spapr_pci_fw_dev_name(PCIDevice *dev) ++{ ++ const gchar *basename; ++ int slot = PCI_SLOT(dev->devfn); ++ int func = PCI_FUNC(dev->devfn); ++ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); ++ ++ basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, ++ ccode & 0xff); ++ ++ if (func != 0) { ++ return g_strdup_printf("%s@%x,%x", basename, slot, func); ++ } else { ++ return g_strdup_printf("%s@%x", basename, slot); ++ } ++} ++ + /* create OF node for pci device and required OF DT properties */ + static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + void *fdt, int parent_offset) + { + int offset; +- const gchar *basename; +- gchar *nodename; +- int slot = PCI_SLOT(dev->devfn); +- int func = PCI_FUNC(dev->devfn); ++ g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev); + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + ResourceProps rp; + SpaprDrc *drc = drc_from_dev(sphb, dev); +@@ -1373,19 +1387,8 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2); + gchar *loc_code; + +- basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, +- ccode & 0xff); +- +- if (func != 0) { +- nodename = g_strdup_printf("%s@%x,%x", basename, slot, func); +- } else { +- nodename = g_strdup_printf("%s@%x", basename, slot); +- } +- + _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename)); + +- g_free(nodename); +- + /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id)); + _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id)); +diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h +index 8877ff51fb..9522db9047 100644 +--- a/include/hw/pci-host/spapr.h ++++ b/include/hw/pci-host/spapr.h +@@ -212,4 +212,6 @@ static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb) + return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; + } + ++char *spapr_pci_fw_dev_name(PCIDevice *dev); ++ + #endif /* PCI_HOST_SPAPR_H */ +-- +2.27.0 + diff --git a/SOURCES/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch b/SOURCES/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch new file mode 100644 index 0000000..2968267 --- /dev/null +++ b/SOURCES/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch @@ -0,0 +1,101 @@ +From 1fc9b693c54c93736c6f902f3df8b94440e8cc5d Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:09:53 -0500 +Subject: [PATCH 5/9] spapr: Allow memory unplug to always succeed + +RH-Author: Greg Kurz +Message-id: <20210119150954.1017058-6-gkurz@redhat.com> +Patchwork-id: 100686 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 5/6] spapr: Allow memory unplug to always succeed +Bugzilla: 1901837 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +It is currently impossible to hot-unplug a memory device between +machine reset and CAS. + +(qemu) device_del dimm1 +Error: Memory hot unplug not supported for this guest + +This limitation was introduced in order to provide an explicit +error path for older guests that didn't support hot-plug event +sources (and thus memory hot-unplug). + +The linux kernel has been supporting these since 4.11. All recent +enough guests are thus capable of handling the removal of a memory +device at all time, including during early boot. + +Lift the limitation for the latest machine type. This means that +trying to unplug memory from a guest that doesn't support it will +likely just do nothing and the memory will only get removed at +next reboot. Such older guests can still get the existing behavior +by using an older machine type. + +Signed-off-by: Greg Kurz +Message-Id: <160794035064.23292.17560963281911312439.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 1e8b5b1aa16b7d73ba8ba52c95d0b52329d5c9d0) +Signed-off-by: Greg Kurz + +Conflicts: + hw/ppc/spapr.c + include/hw/ppc/spapr.h + +Conflicts around the addition of pre_6_0_memory_unplug. Ignore the +change that sets pre_6_0_memory_unplug for older machine types. +This is ok because pre_6_0_memory_unplug is removed in a subsequent +patch anyway. + +Signed-off-by: Jon Maloy +--- + hw/ppc/spapr.c | 3 ++- + hw/ppc/spapr_events.c | 3 ++- + include/hw/ppc/spapr.h | 1 + + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 992bd08aaa..f8de33e3e5 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4001,7 +4001,8 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { +- if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { ++ if (!smc->pre_6_0_memory_unplug || ++ spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); + } else { + /* NOTE: this means there is a window after guest reset, prior to +diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c +index 15b92b63ad..6e284aa4bc 100644 +--- a/hw/ppc/spapr_events.c ++++ b/hw/ppc/spapr_events.c +@@ -547,7 +547,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, + /* we should not be using count_indexed value unless the guest + * supports dedicated hotplug event source + */ +- g_assert(spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); ++ g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || ++ spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); + hp->drc_id.count_indexed.count = + cpu_to_be32(drc_id->count_indexed.count); + hp->drc_id.count_indexed.index = +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index e5e2a99046..ac6961ed16 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -124,6 +124,7 @@ struct SpaprMachineClass { + bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ + bool linux_pci_probe; + bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ ++ bool pre_6_0_memory_unplug; + + bool has_power9_support; + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, +-- +2.18.2 + diff --git a/SOURCES/kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch b/SOURCES/kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch new file mode 100644 index 0000000..1462d52 --- /dev/null +++ b/SOURCES/kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch @@ -0,0 +1,145 @@ +From ad7aaf34400b1bbd41bbec182fd5895eaad50932 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:09:51 -0500 +Subject: [PATCH 3/9] spapr: Don't use spapr_drc_needed() in CAS code + +RH-Author: Greg Kurz +Message-id: <20210119150954.1017058-4-gkurz@redhat.com> +Patchwork-id: 100683 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 3/6] spapr: Don't use spapr_drc_needed() in CAS code +Bugzilla: 1901837 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +We currently don't support hotplug of devices between boot and CAS. If +this happens a CAS reboot is triggered. We detect this during CAS using +the spapr_drc_needed() function which is essentially a VMStateDescription +.needed callback. Even if the condition for CAS reboot happens to be the +same as for DRC migration, it looks wrong to piggyback a migration helper +for this. + +Introduce a helper with slightly more explicit name and use it in both CAS +and DRC migration code. Since a subsequent patch will enhance this helper +to cover the case of hot unplug, let's go for spapr_drc_transient(). While +here convert spapr_hotplugged_dev_before_cas() to the "transient" wording as +well. + +This doesn't change any behaviour. + +Signed-off-by: Greg Kurz +Message-Id: <158169248180.3465937.9531405453362718771.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 4b63db1289a9e597bc151fa5e4d72f882cb6de1e) +Signed-off-by: Greg Kurz +Signed-off-by: Jon Maloy +--- + hw/ppc/spapr_drc.c | 20 ++++++++++++++------ + hw/ppc/spapr_hcall.c | 14 +++++++++----- + include/hw/ppc/spapr_drc.h | 4 +++- + 3 files changed, 26 insertions(+), 12 deletions(-) + +diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c +index 62f1a42592..9b498d429e 100644 +--- a/hw/ppc/spapr_drc.c ++++ b/hw/ppc/spapr_drc.c +@@ -455,23 +455,31 @@ void spapr_drc_reset(SpaprDrc *drc) + } + } + +-bool spapr_drc_needed(void *opaque) ++bool spapr_drc_transient(SpaprDrc *drc) + { +- SpaprDrc *drc = (SpaprDrc *)opaque; + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + +- /* If no dev is plugged in there is no need to migrate the DRC state */ ++ /* ++ * If no dev is plugged in there is no need to migrate the DRC state ++ * nor to reset the DRC at CAS. ++ */ + if (!drc->dev) { + return false; + } + + /* +- * We need to migrate the state if it's not equal to the expected +- * long-term state, which is the same as the coldplugged initial +- * state */ ++ * We need to reset the DRC at CAS or to migrate the DRC state if it's ++ * not equal to the expected long-term state, which is the same as the ++ * coldplugged initial state. ++ */ + return (drc->state != drck->ready_state); + } + ++static bool spapr_drc_needed(void *opaque) ++{ ++ return spapr_drc_transient(opaque); ++} ++ + static const VMStateDescription vmstate_spapr_drc = { + .name = "spapr_drc", + .version_id = 1, +diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c +index 0f19be794c..d70e643752 100644 +--- a/hw/ppc/spapr_hcall.c ++++ b/hw/ppc/spapr_hcall.c +@@ -1640,20 +1640,24 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu, + return best_compat; + } + +-static bool spapr_hotplugged_dev_before_cas(void) ++static bool spapr_transient_dev_before_cas(void) + { +- Object *drc_container, *obj; ++ Object *drc_container; + ObjectProperty *prop; + ObjectPropertyIterator iter; + + drc_container = container_get(object_get_root(), "/dr-connector"); + object_property_iter_init(&iter, drc_container); + while ((prop = object_property_iter_next(&iter))) { ++ SpaprDrc *drc; ++ + if (!strstart(prop->type, "link<", NULL)) { + continue; + } +- obj = object_property_get_link(drc_container, prop->name, NULL); +- if (spapr_drc_needed(obj)) { ++ drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container, ++ prop->name, NULL)); ++ ++ if (spapr_drc_transient(drc)) { + return true; + } + } +@@ -1812,7 +1816,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + + spapr_irq_update_active_intc(spapr); + +- if (spapr_hotplugged_dev_before_cas()) { ++ if (spapr_transient_dev_before_cas()) { + spapr->cas_reboot = true; + } + +diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h +index 83f03cc577..7e09d57114 100644 +--- a/include/hw/ppc/spapr_drc.h ++++ b/include/hw/ppc/spapr_drc.h +@@ -269,7 +269,9 @@ int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask); + + void spapr_drc_attach(SpaprDrc *drc, DeviceState *d, Error **errp); + void spapr_drc_detach(SpaprDrc *drc); +-bool spapr_drc_needed(void *opaque); ++ ++/* Returns true if a hot plug/unplug request is pending */ ++bool spapr_drc_transient(SpaprDrc *drc); + + static inline bool spapr_drc_unplug_requested(SpaprDrc *drc) + { +-- +2.18.2 + diff --git a/SOURCES/kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch b/SOURCES/kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch new file mode 100644 index 0000000..c14aa7d --- /dev/null +++ b/SOURCES/kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch @@ -0,0 +1,105 @@ +From 9ebed8090b88282f9b7432258df9182b9d3944ee Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:09:52 -0500 +Subject: [PATCH 4/9] spapr: Fix handling of unplugged devices during CAS and + migration + +RH-Author: Greg Kurz +Message-id: <20210119150954.1017058-5-gkurz@redhat.com> +Patchwork-id: 100685 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 4/6] spapr: Fix handling of unplugged devices during CAS and migration +Bugzilla: 1901837 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +We already detect if a device is being hot plugged before CAS to trigger +a CAS reboot and during migration to migrate the state of the associated +DRC. But hot unplugging a device is also an asynchronous operation that +requires the guest to take action. This means that if the guest is migrated +after the hot unplug event was sent but before it could release the device +with RTAS, the destination QEMU doesn't know about the pending unplug +operation and doesn't actually remove the device when the guest finally +releases it. + +Similarly, if the unplug request is fired before CAS, the guest isn't +notified of the change, just like with hotplug. It ends up booting with +the device still present in the DT and configures it, just like it was +never removed. Even weirder, since the event is still queued, it will +be eventually processed when some other unrelated event is posted to +the guest. + +Enhance spapr_drc_transient() to also return true if an unplug request is +pending. This fixes the issue at CAS with a CAS reboot request and +causes the DRC state to be migrated. Some extra care is still needed to +inform the destination that an unplug request is pending : migrate the +unplug_requested field of the DRC in an optional subsection. This might +break backwards migration, but this is still better than ending with +an inconsistent guest. + +Signed-off-by: Greg Kurz +Message-Id: <158169248798.3465937.1108351365840514270.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit ab8584349c476f9818dc6403359c85f9ab0ad5eb) +Signed-off-by: Greg Kurz +Signed-off-by: Jon Maloy +--- + hw/ppc/spapr_drc.c | 25 +++++++++++++++++++++++-- + 1 file changed, 23 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c +index 9b498d429e..897bb7aae0 100644 +--- a/hw/ppc/spapr_drc.c ++++ b/hw/ppc/spapr_drc.c +@@ -455,6 +455,22 @@ void spapr_drc_reset(SpaprDrc *drc) + } + } + ++static bool spapr_drc_unplug_requested_needed(void *opaque) ++{ ++ return spapr_drc_unplug_requested(opaque); ++} ++ ++static const VMStateDescription vmstate_spapr_drc_unplug_requested = { ++ .name = "spapr_drc/unplug_requested", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = spapr_drc_unplug_requested_needed, ++ .fields = (VMStateField []) { ++ VMSTATE_BOOL(unplug_requested, SpaprDrc), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + bool spapr_drc_transient(SpaprDrc *drc) + { + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); +@@ -470,9 +486,10 @@ bool spapr_drc_transient(SpaprDrc *drc) + /* + * We need to reset the DRC at CAS or to migrate the DRC state if it's + * not equal to the expected long-term state, which is the same as the +- * coldplugged initial state. ++ * coldplugged initial state, or if an unplug request is pending. + */ +- return (drc->state != drck->ready_state); ++ return drc->state != drck->ready_state || ++ spapr_drc_unplug_requested(drc); + } + + static bool spapr_drc_needed(void *opaque) +@@ -488,6 +505,10 @@ static const VMStateDescription vmstate_spapr_drc = { + .fields = (VMStateField []) { + VMSTATE_UINT32(state, SpaprDrc), + VMSTATE_END_OF_LIST() ++ }, ++ .subsections = (const VMStateDescription * []) { ++ &vmstate_spapr_drc_unplug_requested, ++ NULL + } + }; + +-- +2.18.2 + diff --git a/SOURCES/kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch b/SOURCES/kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch new file mode 100644 index 0000000..b0ca288 --- /dev/null +++ b/SOURCES/kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch @@ -0,0 +1,246 @@ +From cb9d5380b1376b2a44d91d84eaf09f948ef1e165 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:09:50 -0500 +Subject: [PATCH 2/9] spapr: Fold h_cas_compose_response() into + h_client_architecture_support() + +RH-Author: Greg Kurz +Message-id: <20210119150954.1017058-3-gkurz@redhat.com> +Patchwork-id: 100687 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 2/6] spapr: Fold h_cas_compose_response() into h_client_architecture_support() +Bugzilla: 1901837 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: David Gibson + +spapr_h_cas_compose_response() handles the last piece of the PAPR feature +negotiation process invoked via the ibm,client-architecture-support OF +call. Its only caller is h_client_architecture_support() which handles +most of the rest of that process. + +I believe it was placed in a separate file originally to handle some +fiddly dependencies between functions, but mostly it's just confusing +to have the CAS process split into two pieces like this. Now that +compose response is simplified (by just generating the whole device +tree anew), it's cleaner to just fold it into +h_client_architecture_support(). + +Signed-off-by: David Gibson +Reviewed-by: Cedric Le Goater +Reviewed-by: Greg Kurz +(cherry picked from commit 0c21e073541cc093b4cb8744640e24f130e6f8ba) +Signed-off-by: Greg Kurz +Signed-off-by: Jon Maloy +--- + hw/ppc/spapr.c | 61 +----------------------------------------- + hw/ppc/spapr_hcall.c | 55 ++++++++++++++++++++++++++++++++++--- + include/hw/ppc/spapr.h | 4 +-- + 3 files changed, 54 insertions(+), 66 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 92f63ad035..992bd08aaa 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -76,7 +76,6 @@ + #include "hw/nmi.h" + #include "hw/intc/intc.h" + +-#include "qemu/cutils.h" + #include "hw/ppc/spapr_cpu_core.h" + #include "hw/mem/memory-device.h" + #include "hw/ppc/spapr_tpm_proxy.h" +@@ -898,63 +897,6 @@ out: + return ret; + } + +-static bool spapr_hotplugged_dev_before_cas(void) +-{ +- Object *drc_container, *obj; +- ObjectProperty *prop; +- ObjectPropertyIterator iter; +- +- drc_container = container_get(object_get_root(), "/dr-connector"); +- object_property_iter_init(&iter, drc_container); +- while ((prop = object_property_iter_next(&iter))) { +- if (!strstart(prop->type, "link<", NULL)) { +- continue; +- } +- obj = object_property_get_link(drc_container, prop->name, NULL); +- if (spapr_drc_needed(obj)) { +- return true; +- } +- } +- return false; +-} +- +-static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, +- size_t space); +- +-int spapr_h_cas_compose_response(SpaprMachineState *spapr, +- target_ulong addr, target_ulong size, +- SpaprOptionVector *ov5_updates) +-{ +- void *fdt; +- SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 }; +- +- if (spapr_hotplugged_dev_before_cas()) { +- return 1; +- } +- +- if (size < sizeof(hdr)) { +- error_report("SLOF provided insufficient CAS buffer " +- TARGET_FMT_lu " (min: %zu)", size, sizeof(hdr)); +- exit(EXIT_FAILURE); +- } +- +- size -= sizeof(hdr); +- +- fdt = spapr_build_fdt(spapr, false, size); +- _FDT((fdt_pack(fdt))); +- +- cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); +- cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); +- trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); +- +- g_free(spapr->fdt_blob); +- spapr->fdt_size = fdt_totalsize(fdt); +- spapr->fdt_initial_size = spapr->fdt_size; +- spapr->fdt_blob = fdt; +- +- return 0; +-} +- + static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) + { + MachineState *ms = MACHINE(spapr); +@@ -1192,8 +1134,7 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) + } + } + +-static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, +- size_t space) ++void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) + { + MachineState *machine = MACHINE(spapr); + MachineClass *mc = MACHINE_GET_CLASS(machine); +diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c +index 05a7ca275b..0f19be794c 100644 +--- a/hw/ppc/spapr_hcall.c ++++ b/hw/ppc/spapr_hcall.c +@@ -1,4 +1,5 @@ + #include "qemu/osdep.h" ++#include "qemu/cutils.h" + #include "qapi/error.h" + #include "sysemu/hw_accel.h" + #include "sysemu/runstate.h" +@@ -15,6 +16,7 @@ + #include "cpu-models.h" + #include "trace.h" + #include "kvm_ppc.h" ++#include "hw/ppc/fdt.h" + #include "hw/ppc/spapr_ovec.h" + #include "mmu-book3s-v3.h" + #include "hw/mem/memory-device.h" +@@ -1638,6 +1640,26 @@ static uint32_t cas_check_pvr(SpaprMachineState *spapr, PowerPCCPU *cpu, + return best_compat; + } + ++static bool spapr_hotplugged_dev_before_cas(void) ++{ ++ Object *drc_container, *obj; ++ ObjectProperty *prop; ++ ObjectPropertyIterator iter; ++ ++ drc_container = container_get(object_get_root(), "/dr-connector"); ++ object_property_iter_init(&iter, drc_container); ++ while ((prop = object_property_iter_next(&iter))) { ++ if (!strstart(prop->type, "link<", NULL)) { ++ continue; ++ } ++ obj = object_property_get_link(drc_container, prop->name, NULL); ++ if (spapr_drc_needed(obj)) { ++ return true; ++ } ++ } ++ return false; ++} ++ + static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, +@@ -1645,6 +1667,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + { + /* Working address in data buffer */ + target_ulong addr = ppc64_phys_to_real(args[0]); ++ target_ulong fdt_buf = args[1]; ++ target_ulong fdt_bufsize = args[2]; + target_ulong ov_table; + uint32_t cas_pvr; + SpaprOptionVector *ov1_guest, *ov5_guest, *ov5_cas_old, *ov5_updates; +@@ -1788,16 +1812,41 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + + spapr_irq_update_active_intc(spapr); + ++ if (spapr_hotplugged_dev_before_cas()) { ++ spapr->cas_reboot = true; ++ } ++ + if (!spapr->cas_reboot) { ++ void *fdt; ++ SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 }; ++ + /* If spapr_machine_reset() did not set up a HPT but one is necessary + * (because the guest isn't going to use radix) then set it up here. */ + if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { + /* legacy hash or new hash: */ + spapr_setup_hpt_and_vrma(spapr); + } +- spapr->cas_reboot = +- (spapr_h_cas_compose_response(spapr, args[1], args[2], +- ov5_updates) != 0); ++ ++ if (fdt_bufsize < sizeof(hdr)) { ++ error_report("SLOF provided insufficient CAS buffer " ++ TARGET_FMT_lu " (min: %zu)", fdt_bufsize, sizeof(hdr)); ++ exit(EXIT_FAILURE); ++ } ++ ++ fdt_bufsize -= sizeof(hdr); ++ ++ fdt = spapr_build_fdt(spapr, false, fdt_bufsize); ++ _FDT((fdt_pack(fdt))); ++ ++ cpu_physical_memory_write(fdt_buf, &hdr, sizeof(hdr)); ++ cpu_physical_memory_write(fdt_buf + sizeof(hdr), fdt, ++ fdt_totalsize(fdt)); ++ trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); ++ ++ g_free(spapr->fdt_blob); ++ spapr->fdt_size = fdt_totalsize(fdt); ++ spapr->fdt_initial_size = spapr->fdt_size; ++ spapr->fdt_blob = fdt; + } + + spapr_ovec_cleanup(ov5_updates); +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index e047dabf30..e5e2a99046 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -767,11 +767,9 @@ struct SpaprEventLogEntry { + QTAILQ_ENTRY(SpaprEventLogEntry) next; + }; + ++void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space); + void spapr_events_init(SpaprMachineState *sm); + void spapr_dt_events(SpaprMachineState *sm, void *fdt); +-int spapr_h_cas_compose_response(SpaprMachineState *sm, +- target_ulong addr, target_ulong size, +- SpaprOptionVector *ov5_updates); + void close_htab_fd(SpaprMachineState *spapr); + void spapr_setup_hpt_and_vrma(SpaprMachineState *spapr); + void spapr_free_hpt(SpaprMachineState *spapr); +-- +2.18.2 + diff --git a/SOURCES/kvm-spapr-Improve-handling-of-fdt-buffer-size.patch b/SOURCES/kvm-spapr-Improve-handling-of-fdt-buffer-size.patch new file mode 100644 index 0000000..2f57cde --- /dev/null +++ b/SOURCES/kvm-spapr-Improve-handling-of-fdt-buffer-size.patch @@ -0,0 +1,125 @@ +From 04f7fe2423a4de8d2fea7068b3fb316e15e76eaa Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:09:49 -0500 +Subject: [PATCH 1/9] spapr: Improve handling of fdt buffer size + +RH-Author: Greg Kurz +Message-id: <20210119150954.1017058-2-gkurz@redhat.com> +Patchwork-id: 100682 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 1/6] spapr: Improve handling of fdt buffer size +Bugzilla: 1901837 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: David Gibson + +Previously, spapr_build_fdt() constructed the device tree in a fixed +buffer of size FDT_MAX_SIZE. This is a bit inflexible, but more +importantly it's awkward for the case where we use it during CAS. In +that case the guest firmware supplies a buffer and we have to +awkwardly check that what we generated fits into it afterwards, after +doing a lot of size checks during spapr_build_fdt(). + +Simplify this by having spapr_build_fdt() take a 'space' parameter. +For the CAS case, we pass in the buffer size provided by SLOF, for the +machine init case, we continue to pass FDT_MAX_SIZE. + +Signed-off-by: David Gibson +Reviewed-by: Cedric Le Goater +Reviewed-by: Greg Kurz +(cherry picked from commit 97b32a6afa78ae68fb16344b9a144b6f433f42a2) +Signed-off-by: Greg Kurz +Signed-off-by: Jon Maloy +--- + hw/ppc/spapr.c | 33 +++++++++++---------------------- + 1 file changed, 11 insertions(+), 22 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index c74079702d..92f63ad035 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -918,7 +918,8 @@ static bool spapr_hotplugged_dev_before_cas(void) + return false; + } + +-static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset); ++static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, ++ size_t space); + + int spapr_h_cas_compose_response(SpaprMachineState *spapr, + target_ulong addr, target_ulong size, +@@ -931,24 +932,17 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr, + return 1; + } + +- if (size < sizeof(hdr) || size > FW_MAX_SIZE) { +- error_report("SLOF provided an unexpected CAS buffer size " +- TARGET_FMT_lu " (min: %zu, max: %u)", +- size, sizeof(hdr), FW_MAX_SIZE); ++ if (size < sizeof(hdr)) { ++ error_report("SLOF provided insufficient CAS buffer " ++ TARGET_FMT_lu " (min: %zu)", size, sizeof(hdr)); + exit(EXIT_FAILURE); + } + + size -= sizeof(hdr); + +- fdt = spapr_build_fdt(spapr, false); ++ fdt = spapr_build_fdt(spapr, false, size); + _FDT((fdt_pack(fdt))); + +- if (fdt_totalsize(fdt) + sizeof(hdr) > size) { +- g_free(fdt); +- trace_spapr_cas_failed(size); +- return -1; +- } +- + cpu_physical_memory_write(addr, &hdr, sizeof(hdr)); + cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt)); + trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr)); +@@ -1198,7 +1192,8 @@ static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) + } + } + +-static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset) ++static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, ++ size_t space) + { + MachineState *machine = MACHINE(spapr); + MachineClass *mc = MACHINE_GET_CLASS(machine); +@@ -1208,8 +1203,8 @@ static void *spapr_build_fdt(SpaprMachineState *spapr, bool reset) + SpaprPhbState *phb; + char *buf; + +- fdt = g_malloc0(FDT_MAX_SIZE); +- _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); ++ fdt = g_malloc0(space); ++ _FDT((fdt_create_empty_tree(fdt, space))); + + /* Root node */ + _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp")); +@@ -1724,19 +1719,13 @@ static void spapr_machine_reset(MachineState *machine) + */ + fdt_addr = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FDT_MAX_SIZE; + +- fdt = spapr_build_fdt(spapr, true); ++ fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE); + + rc = fdt_pack(fdt); + + /* Should only fail if we've built a corrupted tree */ + assert(rc == 0); + +- if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { +- error_report("FDT too big ! 0x%x bytes (max is 0x%x)", +- fdt_totalsize(fdt), FDT_MAX_SIZE); +- exit(1); +- } +- + /* Load the fdt */ + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); + cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); +-- +2.18.2 + diff --git a/SOURCES/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch b/SOURCES/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch new file mode 100644 index 0000000..b4b2b5f --- /dev/null +++ b/SOURCES/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch @@ -0,0 +1,170 @@ +From f94b3a4eb9d709f1f6a14ad9ad6ebcc1b67b6923 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:09:54 -0500 +Subject: [PATCH 6/9] spapr: Improve handling of memory unplug with old guests + +RH-Author: Greg Kurz +Message-id: <20210119150954.1017058-7-gkurz@redhat.com> +Patchwork-id: 100684 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH v2 6/6] spapr: Improve handling of memory unplug with old guests +Bugzilla: 1901837 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") +trying to unplug memory from a guest that doesn't support it (eg. rhel6) +no longer generates an error like it used to. Instead, it leaves the +memory around : only a subsequent reboot or manual use of drmgr within +the guest can complete the hot-unplug sequence. A flag was added to +SpaprMachineClass so that this new behavior only applies to the default +machine type. + +We can do better. CAS processes all pending hot-unplug requests. This +means that we don't really care about what the guest supports if +the hot-unplug request happens before CAS. + +All guests that we care for, even old ones, set enough bits in OV5 +that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a +heuristic to decide if CAS has already occured or not. + +Always accept unplug requests that happen before CAS since CAS will +process them. Restore the previous behavior of rejecting them after +CAS when we know that the guest doesn't support memory hot-unplug. + +This behavior is suitable for all machine types : this allows to +drop the pre_6_0_memory_unplug flag. + +Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") +Signed-off-by: Greg Kurz +Message-Id: <161012708715.801107.11418801796987916516.stgit@bahia.lan> +Reviewed-by: Daniel Henrique Barboza +Signed-off-by: David Gibson +(cherry picked from commit 73598c75df0585e039825e642adede21912dabc7) +Signed-off-by: Greg Kurz + +Conflicts: + hw/ppc/spapr.c + include/hw/ppc/spapr.h + +Contextual conflicts around the removal of pre_6_0_memory_unplug, +which was only partially backported from upstream 1e8b5b1aa16b, and +the addition of spapr_memory_hot_unplug_supported(). + +Signed-off-by: Jon Maloy +--- + hw/ppc/spapr.c | 21 +++++++++++++-------- + hw/ppc/spapr_events.c | 3 +-- + hw/ppc/spapr_ovec.c | 7 +++++++ + include/hw/ppc/spapr.h | 2 +- + include/hw/ppc/spapr_ovec.h | 1 + + 5 files changed, 23 insertions(+), 11 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index f8de33e3e5..00b1ef075e 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3993,6 +3993,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, + } + } + ++bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr) ++{ ++ return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) || ++ /* ++ * CAS will process all pending unplug requests. ++ * ++ * HACK: a guest could theoretically have cleared all bits in OV5, ++ * but none of the guests we care for do. ++ */ ++ spapr_ovec_empty(spapr->ov5_cas); ++} ++ + static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -4001,16 +4013,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { +- if (!smc->pre_6_0_memory_unplug || +- spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { ++ if (spapr_memory_hot_unplug_supported(sms)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); + } else { +- /* NOTE: this means there is a window after guest reset, prior to +- * CAS negotiation, where unplug requests will fail due to the +- * capability not being detected yet. This is a bit different than +- * the case with PCI unplug, where the events will be queued and +- * eventually handled by the guest after boot +- */ + error_setg(errp, "Memory hot unplug not supported for this guest"); + } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { +diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c +index 6e284aa4bc..08168acd65 100644 +--- a/hw/ppc/spapr_events.c ++++ b/hw/ppc/spapr_events.c +@@ -547,8 +547,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, + /* we should not be using count_indexed value unless the guest + * supports dedicated hotplug event source + */ +- g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || +- spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); ++ g_assert(spapr_memory_hot_unplug_supported(spapr)); + hp->drc_id.count_indexed.count = + cpu_to_be32(drc_id->count_indexed.count); + hp->drc_id.count_indexed.index = +diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c +index 811fadf143..f858afc7d5 100644 +--- a/hw/ppc/spapr_ovec.c ++++ b/hw/ppc/spapr_ovec.c +@@ -135,6 +135,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr) + return test_bit(bitnr, ov->bitmap) ? true : false; + } + ++bool spapr_ovec_empty(SpaprOptionVector *ov) ++{ ++ g_assert(ov); ++ ++ return bitmap_empty(ov->bitmap, OV_MAXBITS); ++} ++ + static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap, + long bitmap_offset) + { +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index ac6961ed16..7aaf5d9996 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -124,7 +124,6 @@ struct SpaprMachineClass { + bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ + bool linux_pci_probe; + bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ +- bool pre_6_0_memory_unplug; + + bool has_power9_support; + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, +@@ -894,4 +893,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + #define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */ + + void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); ++bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); + #endif /* HW_SPAPR_H */ +diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h +index 7891e9caac..98c73bf601 100644 +--- a/include/hw/ppc/spapr_ovec.h ++++ b/include/hw/ppc/spapr_ovec.h +@@ -73,6 +73,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov); + void spapr_ovec_set(SpaprOptionVector *ov, long bitnr); + void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr); + bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr); ++bool spapr_ovec_empty(SpaprOptionVector *ov); + SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector); + int spapr_ovec_populate_dt(void *fdt, int fdt_offset, + SpaprOptionVector *ov, const char *name); +-- +2.18.2 + diff --git a/SOURCES/kvm-target-i386-add-fast-short-REP-MOV-support.patch b/SOURCES/kvm-target-i386-add-fast-short-REP-MOV-support.patch new file mode 100644 index 0000000..51af7e7 --- /dev/null +++ b/SOURCES/kvm-target-i386-add-fast-short-REP-MOV-support.patch @@ -0,0 +1,59 @@ +From f33880c5f7a4e2cad25c22112da073273c6e2cfb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 24 Feb 2021 11:30:35 -0500 +Subject: [PATCH 2/4] target/i386: add fast short REP MOV support + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210224113037.15599-3-dgilbert@redhat.com> +Patchwork-id: 101201 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/4] target/i386: add fast short REP MOV support +Bugzilla: 1790620 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Peter Xu + +From: Chenyi Qiang + +For CPUs support fast short REP MOV[CPUID.(EAX=7,ECX=0):EDX(bit4)], e.g +Icelake and Tigerlake, expose it to the guest VM. + +Reviewed-by: Eduardo Habkost +Signed-off-by: Chenyi Qiang +Message-Id: <20200714084148.26690-2-chenyi.qiang@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 5cb287d2bd578dfe4897458793b4fce35bc4f744) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 67dab94aa5..f6a9ed84b3 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1077,7 +1077,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", +- NULL, NULL, NULL, NULL, ++ "fsrm", NULL, NULL, NULL, + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL /* pconfig */, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 8e2e52ed31..f5a4efcec6 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -770,6 +770,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) + /* AVX512 Multiply Accumulation Single Precision */ + #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) ++/* Fast Short Rep Mov */ ++#define CPUID_7_0_EDX_FSRM (1U << 4) + /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ + #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* Speculation Control */ +-- +2.27.0 + diff --git a/SOURCES/kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch b/SOURCES/kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch new file mode 100644 index 0000000..059445b --- /dev/null +++ b/SOURCES/kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch @@ -0,0 +1,116 @@ +From ba3068eb1a349ec4ed8b7ccdae76450f0c315be9 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 19 Nov 2020 17:23:11 -0500 +Subject: [PATCH 18/18] trace: use STAP_SDT_V2 to work around symbol visibility +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20201119172311.942629-2-stefanha@redhat.com> +Patchwork-id: 99779 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/1] trace: use STAP_SDT_V2 to work around symbol visibility +Bugzilla: 1898700 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Philippe Mathieu-Daudé + +QEMU binaries no longer launch successfully with recent SystemTap +releases. This is because modular QEMU builds link the sdt semaphores +into the main binary instead of into the shared objects where they are +used. The symbol visibility of semaphores is 'hidden' and the dynamic +linker prints an error during module loading: + + $ ./configure --enable-trace-backends=dtrace --enable-modules ... + ... + Failed to open module: /builddir/build/BUILD/qemu-4.2.0/s390x-softmmu/../block-curl.so: undefined symbol: qemu_curl_close_semaphore + +The long-term solution is to generate per-module dtrace .o files and +link them into the module instead of the main binary. + +In the short term we can define STAP_SDT_V2 so dtrace(1) produces a .o +file with 'default' symbol visibility instead of 'hidden'. This +workaround is small and easier to merge for QEMU 5.2 and downstream +backports. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1898700 +Cc: wcohen@redhat.com +Cc: fche@redhat.com +Cc: kraxel@redhat.com +Cc: rjones@redhat.com +Cc: ddepaula@redhat.com +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Miroslav Rezanina + +(cherry picked from commit 4b265c79a85bb35abe19aacea6954c1616521639) +Signed-off-by: Stefan Hajnoczi + +Conflicts: + trace/meson.build + Downstream uses makefiles, so move the dtrace invocation changes to + rules.mak and Makefile. +Signed-off-by: Danilo C. L. de Paula +--- + Makefile | 4 ++-- + configure | 7 +++++++ + rules.mak | 2 +- + 3 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/Makefile b/Makefile +index ff05c309497..29b01a13ee3 100644 +--- a/Makefile ++++ b/Makefile +@@ -198,7 +198,7 @@ tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py") + $< > $@,"GEN","$(@:%-timestamp=%)") + + %/trace-dtrace.h: %/trace-dtrace.dtrace $(tracetool-y) +- $(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@") ++ $(call quiet-command,dtrace -o $@ -DSTAP_SDT_V2 -h -s $<, "GEN","$@") + + %/trace-dtrace.o: %/trace-dtrace.dtrace $(tracetool-y) + +@@ -258,7 +258,7 @@ trace-dtrace-root.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config + $< > $@,"GEN","$(@:%-timestamp=%)") + + trace-dtrace-root.h: trace-dtrace-root.dtrace +- $(call quiet-command,dtrace -o $@ -h -s $<, "GEN","$@") ++ $(call quiet-command,dtrace -o $@ -DSTAP_SDT_V2 -h -s $<, "GEN","$@") + + trace-dtrace-root.o: trace-dtrace-root.dtrace + +diff --git a/configure b/configure +index 5120c1409a7..c62b61403f6 100755 +--- a/configure ++++ b/configure +@@ -5275,6 +5275,13 @@ if have_backend "dtrace"; then + trace_backend_stap="no" + if has 'stap' ; then + trace_backend_stap="yes" ++ ++ # Workaround to avoid dtrace(1) producing a file with 'hidden' symbol ++ # visibility. Define STAP_SDT_V2 to produce 'default' symbol visibility ++ # instead. QEMU --enable-modules depends on this because the SystemTap ++ # semaphores are linked into the main binary and not the module's shared ++ # object. ++ QEMU_CFLAGS="$QEMU_CFLAGS -DSTAP_SDT_V2" + fi + fi + +diff --git a/rules.mak b/rules.mak +index 967295dd2b6..bdfc223a5a1 100644 +--- a/rules.mak ++++ b/rules.mak +@@ -101,7 +101,7 @@ LINK = $(call quiet-command, $(LINKPROG) $(QEMU_LDFLAGS) $(QEMU_CFLAGS) $(CFLAGS + -c -o $@ $<,"OBJC","$(TARGET_DIR)$@") + + %.o: %.dtrace +- $(call quiet-command,dtrace -o $@ -G -s $<,"GEN","$(TARGET_DIR)$@") ++ $(call quiet-command,dtrace -o $@ -DSTAP_SDT_V2 -G -s $<,"GEN","$(TARGET_DIR)$@") + + DSO_OBJ_CFLAGS := -fPIC -DBUILD_DSO + module-common.o: CFLAGS += $(DSO_OBJ_CFLAGS) +-- +2.27.0 + diff --git a/SOURCES/kvm-util-Introduce-qemu_get_host_name.patch b/SOURCES/kvm-util-Introduce-qemu_get_host_name.patch new file mode 100644 index 0000000..da21888 --- /dev/null +++ b/SOURCES/kvm-util-Introduce-qemu_get_host_name.patch @@ -0,0 +1,123 @@ +From 41510fba34cda98cb85a8d04e46dcfdd9a91aa61 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 24 Dec 2020 12:53:03 -0500 +Subject: [PATCH 3/5] util: Introduce qemu_get_host_name() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20201224125304.62697-3-marcandre.lureau@redhat.com> +Patchwork-id: 100499 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/3] util: Introduce qemu_get_host_name() +Bugzilla: 1910326 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Philippe Mathieu-Daudé + +From: Michal Privoznik + +This function offers operating system agnostic way to fetch host +name. It is implemented for both POSIX-like and Windows systems. + +Signed-off-by: Michal Privoznik +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Daniel P. Berrangé +Cc: qemu-stable@nongnu.org +Signed-off-by: Michael Roth + +(cherry picked from commit e47f4765afcab2b78dfa5b0115abf64d1d49a5d3) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + include/qemu/osdep.h | 10 ++++++++++ + util/oslib-posix.c | 35 +++++++++++++++++++++++++++++++++++ + util/oslib-win32.c | 13 +++++++++++++ + 3 files changed, 58 insertions(+) + +diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h +index 0f97d68586a..d427e81a427 100644 +--- a/include/qemu/osdep.h ++++ b/include/qemu/osdep.h +@@ -620,4 +620,14 @@ static inline void qemu_reset_optind(void) + #endif + } + ++/** ++ * qemu_get_host_name: ++ * @errp: Error object ++ * ++ * Operating system agnostic way of querying host name. ++ * ++ * Returns allocated hostname (caller should free), NULL on failure. ++ */ ++char *qemu_get_host_name(Error **errp); ++ + #endif +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index 5a291cc9820..8f88e4dbe10 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -726,3 +726,38 @@ void sigaction_invoke(struct sigaction *action, + } + action->sa_sigaction(info->ssi_signo, &si, NULL); + } ++ ++#ifndef HOST_NAME_MAX ++# ifdef _POSIX_HOST_NAME_MAX ++# define HOST_NAME_MAX _POSIX_HOST_NAME_MAX ++# else ++# define HOST_NAME_MAX 255 ++# endif ++#endif ++ ++char *qemu_get_host_name(Error **errp) ++{ ++ long len = -1; ++ g_autofree char *hostname = NULL; ++ ++#ifdef _SC_HOST_NAME_MAX ++ len = sysconf(_SC_HOST_NAME_MAX); ++#endif /* _SC_HOST_NAME_MAX */ ++ ++ if (len < 0) { ++ len = HOST_NAME_MAX; ++ } ++ ++ /* Unfortunately, gethostname() below does not guarantee a ++ * NULL terminated string. Therefore, allocate one byte more ++ * to be sure. */ ++ hostname = g_new0(char, len + 1); ++ ++ if (gethostname(hostname, len) < 0) { ++ error_setg_errno(errp, errno, ++ "cannot get hostname"); ++ return NULL; ++ } ++ ++ return g_steal_pointer(&hostname); ++} +diff --git a/util/oslib-win32.c b/util/oslib-win32.c +index e9b14ab1784..3b49d272972 100644 +--- a/util/oslib-win32.c ++++ b/util/oslib-win32.c +@@ -808,3 +808,16 @@ bool qemu_write_pidfile(const char *filename, Error **errp) + } + return true; + } ++ ++char *qemu_get_host_name(Error **errp) ++{ ++ wchar_t tmp[MAX_COMPUTERNAME_LENGTH + 1]; ++ DWORD size = G_N_ELEMENTS(tmp); ++ ++ if (GetComputerNameW(tmp, &size) == 0) { ++ error_setg_win32(errp, GetLastError(), "failed close handle"); ++ return NULL; ++ } ++ ++ return g_utf16_to_utf8(tmp, size, NULL, NULL, NULL); ++} +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch b/SOURCES/kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch new file mode 100644 index 0000000..8e58473 --- /dev/null +++ b/SOURCES/kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch @@ -0,0 +1,79 @@ +From f53c2c68db7780353a915072f8c953a74149b1f7 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 19 Jan 2021 12:50:42 -0500 +Subject: [PATCH 3/7] vfio: Create shared routine for scanning info + capabilities +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cornelia Huck +Message-id: <20210119125046.472811-4-cohuck@redhat.com> +Patchwork-id: 100678 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/7] vfio: Create shared routine for scanning info capabilities +Bugzilla: 1905391 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +From: Matthew Rosato + +Rather than duplicating the same loop in multiple locations, +create a static function to do the work. + +Signed-off-by: Matthew Rosato +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +(cherry picked from commit 3ab7a0b40d4be5ade3b61d4afd1518193b199423) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/common.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 5ca11488d67..77d62d2dcdf 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -826,17 +826,12 @@ static void vfio_listener_release(VFIOContainer *container) + } + } + +-struct vfio_info_cap_header * +-vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) ++static struct vfio_info_cap_header * ++vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) + { + struct vfio_info_cap_header *hdr; +- void *ptr = info; +- +- if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { +- return NULL; +- } + +- for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { ++ for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { + if (hdr->id == id) { + return hdr; + } +@@ -845,6 +840,16 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) + return NULL; + } + ++struct vfio_info_cap_header * ++vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) ++{ ++ if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { ++ return NULL; ++ } ++ ++ return vfio_get_cap((void *)info, info->cap_offset, id); ++} ++ + static int vfio_setup_region_sparse_mmaps(VFIORegion *region, + struct vfio_region_info *info) + { +-- +2.27.0 + diff --git a/SOURCES/kvm-vfio-Find-DMA-available-capability.patch b/SOURCES/kvm-vfio-Find-DMA-available-capability.patch new file mode 100644 index 0000000..b81bcc4 --- /dev/null +++ b/SOURCES/kvm-vfio-Find-DMA-available-capability.patch @@ -0,0 +1,91 @@ +From e6147c5a23a75361b1374bfb4b96403d243b5c38 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 19 Jan 2021 12:50:43 -0500 +Subject: [PATCH 4/7] vfio: Find DMA available capability + +RH-Author: Cornelia Huck +Message-id: <20210119125046.472811-5-cohuck@redhat.com> +Patchwork-id: 100677 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 4/7] vfio: Find DMA available capability +Bugzilla: 1905391 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +From: Matthew Rosato + +The underlying host may be limiting the number of outstanding DMA +requests for type 1 IOMMU. Add helper functions to check for the +DMA available capability and retrieve the current number of DMA +mappings allowed. + +Signed-off-by: Matthew Rosato +Reviewed-by: Cornelia Huck +[aw: vfio_get_info_dma_avail moved inside CONFIG_LINUX] +Signed-off-by: Alex Williamson +(cherry picked from commit 7486a62845b1e12011dd99973e4739f69d57cd38) +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/common.c | 31 +++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 2 ++ + 2 files changed, 33 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 77d62d2dcdf..23efdfadebd 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -850,6 +850,37 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) + return vfio_get_cap((void *)info, info->cap_offset, id); + } + ++static struct vfio_info_cap_header * ++vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) ++{ ++ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { ++ return NULL; ++ } ++ ++ return vfio_get_cap((void *)info, info->cap_offset, id); ++} ++ ++bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, ++ unsigned int *avail) ++{ ++ struct vfio_info_cap_header *hdr; ++ struct vfio_iommu_type1_info_dma_avail *cap; ++ ++ /* If the capability cannot be found, assume no DMA limiting */ ++ hdr = vfio_get_iommu_type1_info_cap(info, ++ VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); ++ if (hdr == NULL) { ++ return false; ++ } ++ ++ if (avail != NULL) { ++ cap = (void *) hdr; ++ *avail = cap->avail; ++ } ++ ++ return true; ++} ++ + static int vfio_setup_region_sparse_mmaps(VFIORegion *region, + struct vfio_region_info *info) + { +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index fd564209ac7..aa6cbe4a998 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -191,6 +191,8 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, + bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); + struct vfio_info_cap_header * + vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id); ++bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, ++ unsigned int *avail); + #endif + extern const MemoryListener vfio_prereg_listener; + +-- +2.27.0 + diff --git a/SOURCES/kvm-virtio-add-vhost-user-fs-ccw-device.patch b/SOURCES/kvm-virtio-add-vhost-user-fs-ccw-device.patch new file mode 100644 index 0000000..d7d41af --- /dev/null +++ b/SOURCES/kvm-virtio-add-vhost-user-fs-ccw-device.patch @@ -0,0 +1,136 @@ +From fc5d5887462da813d91a3a0649214313d580d7af Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Tue, 27 Oct 2020 12:02:16 -0400 +Subject: [PATCH 03/18] virtio: add vhost-user-fs-ccw device + +RH-Author: Claudio Imbrenda +Message-id: <20201027120217.2997314-3-cimbrend@redhat.com> +Patchwork-id: 98720 +O-Subject: [RHEL8.4 qemu-kvm PATCH 2/3] virtio: add vhost-user-fs-ccw device +Bugzilla: 1857733 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +From: Halil Pasic + +upstream bd0bbb9aba2afbc2ea24b0475be04f795468b381 + +fixed for the backport: +* makefile logic instead of meson +* old style qdev initialization +* old style device class properties + +-- + +Wire up the CCW device for vhost-user-fs. + +Reviewed-by: Cornelia Huck +Signed-off-by: Halil Pasic +Message-id: 20200901150019.29229-2-mhartmay@linux.ibm.com +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/Makefile.objs | 1 + + hw/s390x/vhost-user-fs-ccw.c | 76 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 77 insertions(+) + create mode 100644 hw/s390x/vhost-user-fs-ccw.c + +diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs +index a46a1c7894e..c4086ec3171 100644 +--- a/hw/s390x/Makefile.objs ++++ b/hw/s390x/Makefile.objs +@@ -20,6 +20,7 @@ obj-$(CONFIG_VIRTIO_NET) += virtio-ccw-net.o + obj-$(CONFIG_VIRTIO_BLK) += virtio-ccw-blk.o + obj-$(call land,$(CONFIG_VIRTIO_9P),$(CONFIG_VIRTFS)) += virtio-ccw-9p.o + obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock-ccw.o ++obj-$(CONFIG_VHOST_USER_FS) += vhost-user-fs-ccw.o + endif + obj-y += css-bridge.o + obj-y += ccw-device.o +diff --git a/hw/s390x/vhost-user-fs-ccw.c b/hw/s390x/vhost-user-fs-ccw.c +new file mode 100644 +index 00000000000..e7b165d5f61 +--- /dev/null ++++ b/hw/s390x/vhost-user-fs-ccw.c +@@ -0,0 +1,76 @@ ++/* ++ * virtio ccw vhost-user-fs implementation ++ * ++ * Copyright 2020 IBM Corp. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++#include "qemu/osdep.h" ++#include "hw/qdev-properties.h" ++#include "qapi/error.h" ++#include "hw/virtio/vhost-user-fs.h" ++#include "virtio-ccw.h" ++ ++typedef struct VHostUserFSCcw { ++ VirtioCcwDevice parent_obj; ++ VHostUserFS vdev; ++} VHostUserFSCcw; ++ ++#define TYPE_VHOST_USER_FS_CCW "vhost-user-fs-ccw" ++#define VHOST_USER_FS_CCW(obj) \ ++ OBJECT_CHECK(VHostUserFSCcw, (obj), TYPE_VHOST_USER_FS_CCW) ++ ++ ++static Property vhost_user_fs_ccw_properties[] = { ++ DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, ++ VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), ++ DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, ++ VIRTIO_CCW_MAX_REV), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void vhost_user_fs_ccw_realize(VirtioCcwDevice *ccw_dev, Error **errp) ++{ ++ VHostUserFSCcw *dev = VHOST_USER_FS_CCW(ccw_dev); ++ DeviceState *vdev = DEVICE(&dev->vdev); ++ ++ qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus)); ++ object_property_set_bool(OBJECT(vdev), true, "realized", errp); ++} ++ ++static void vhost_user_fs_ccw_instance_init(Object *obj) ++{ ++ VHostUserFSCcw *dev = VHOST_USER_FS_CCW(obj); ++ VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj); ++ ++ ccw_dev->force_revision_1 = true; ++ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), ++ TYPE_VHOST_USER_FS); ++} ++ ++static void vhost_user_fs_ccw_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass); ++ ++ k->realize = vhost_user_fs_ccw_realize; ++ dc->props = vhost_user_fs_ccw_properties; ++ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); ++} ++ ++static const TypeInfo vhost_user_fs_ccw = { ++ .name = TYPE_VHOST_USER_FS_CCW, ++ .parent = TYPE_VIRTIO_CCW_DEVICE, ++ .instance_size = sizeof(VHostUserFSCcw), ++ .instance_init = vhost_user_fs_ccw_instance_init, ++ .class_init = vhost_user_fs_ccw_class_init, ++}; ++ ++static void vhost_user_fs_ccw_register(void) ++{ ++ type_register_static(&vhost_user_fs_ccw); ++} ++ ++type_init(vhost_user_fs_ccw_register) +-- +2.27.0 + diff --git a/SOURCES/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch b/SOURCES/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch new file mode 100644 index 0000000..b0f678f --- /dev/null +++ b/SOURCES/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch @@ -0,0 +1,167 @@ +From c02ebc7e43f55b9423a065a7c53ba72bdb821c98 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 Feb 2021 23:14:54 -0500 +Subject: [PATCH 1/3] virtiofsd: extract lo_do_open() from lo_open() + +RH-Author: Jon Maloy +Message-id: <20210209231456.1555472-2-jmaloy@redhat.com> +Patchwork-id: 101024 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/3] virtiofsd: extract lo_do_open() from lo_open() +Bugzilla: 1919111 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Greg Kurz +RH-Acked-by: Dr. David Alan Gilbert + +From: Stefan Hajnoczi + +Both lo_open() and lo_create() have similar code to open a file. Extract +a common lo_do_open() function from lo_open() that will be used by +lo_create() in a later commit. + +Since lo_do_open() does not otherwise need fuse_req_t req, convert +lo_add_fd_mapping() to use struct lo_data *lo instead. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-2-stefanha@redhat.com> +Reviewed-by: Greg Kurz +Signed-off-by: Dr. David Alan Gilbert + +(cherry-picked from commit 8afaaee976965b7fb90ec225a51d60f35c5f173c) + +Conflict: update_open_flags() takes fewer arguments in this version + than in upstream. Instead of applying commit e12a0edafeb + ("virtiofsd: Add -o allow_direct_io|no_allow_direct_io + options") we keep the old signature, since this seems to + be an unrelated change. + +Signed-off-by: Jon Maloy +Signed-off-by: Jon Maloy +--- + tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index f41a6b07c8..518ba11c47 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -439,17 +439,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ +-static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) ++static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) + { + struct lo_map_elem *elem; + +- elem = lo_map_alloc_elem(&lo_data(req)->fd_map); ++ elem = lo_map_alloc_elem(&lo->fd_map); + if (!elem) { + return -1; + } + + elem->fd = fd; +- return elem - lo_data(req)->fd_map.elems; ++ return elem - lo->fd_map.elems; + } + + /* Assumes lo->mutex is held */ +@@ -1712,6 +1712,38 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) + fi->flags &= ~O_DIRECT; + } + ++static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, ++ struct fuse_file_info *fi) ++{ ++ char buf[64]; ++ ssize_t fh; ++ int fd; ++ ++ update_open_flags(lo->writeback, fi); ++ ++ sprintf(buf, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) { ++ return errno; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(lo, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ return ENOMEM; ++ } ++ ++ fi->fh = fh; ++ if (lo->cache == CACHE_NONE) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ return 0; ++} ++ + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +@@ -1752,7 +1784,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + ssize_t fh; + + pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); ++ fh = lo_add_fd_mapping(lo, fd); + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); +@@ -1943,38 +1975,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int fd; +- ssize_t fh; +- char buf[64]; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ int err; + + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- update_open_flags(lo->writeback, fi); +- +- sprintf(buf, "%i", lo_fd(req, ino)); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); +- } +- +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- fuse_reply_err(req, ENOMEM); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); + return; + } + +- fi->fh = fh; +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ err = lo_do_open(lo, inode, fi); ++ lo_inode_put(lo, &inode); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_open(req, fi); + } +- fuse_reply_open(req, fi); + } + + static void lo_release(fuse_req_t req, fuse_ino_t ino, +-- +2.18.2 + diff --git a/SOURCES/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch b/SOURCES/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch new file mode 100644 index 0000000..f21d793 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch @@ -0,0 +1,124 @@ +From f2c0b07088966c396ddcee54f4bed97cdb01192f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 Feb 2021 23:14:55 -0500 +Subject: [PATCH 2/3] virtiofsd: optionally return inode pointer from + lo_do_lookup() + +RH-Author: Jon Maloy +Message-id: <20210209231456.1555472-3-jmaloy@redhat.com> +Patchwork-id: 101022 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 2/3] virtiofsd: optionally return inode pointer from lo_do_lookup() +Bugzilla: 1919111 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Greg Kurz +RH-Acked-by: Dr. David Alan Gilbert + +From: Stefan Hajnoczi + +lo_do_lookup() finds an existing inode or allocates a new one. It +increments nlookup so that the inode stays alive until the client +releases it. + +Existing callers don't need the struct lo_inode so the function doesn't +return it. Extend the function to optionally return the inode. The next +commit will need it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Greg Kurz +Message-Id: <20210204150208.367837-3-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert + +(cherry-picked from commit 22d2ece71e533310da31f2857ebc4a00d91968b3) +Signed-off-by: Jon Maloy +Signed-off-by: Jon Maloy +--- + tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- + 1 file changed, 21 insertions(+), 8 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 518ba11c47..e5bd3d73e4 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -878,11 +878,13 @@ static void posix_locks_value_destroy(gpointer data) + } + + /* +- * Increments nlookup and caller must release refcount using +- * lo_inode_put(&parent). ++ * Increments nlookup on the inode on success. unref_inode_lolocked() must be ++ * called eventually to decrement nlookup again. If inodep is non-NULL, the ++ * inode pointer is stored and the caller must call lo_inode_put(). + */ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +- struct fuse_entry_param *e) ++ struct fuse_entry_param *e, ++ struct lo_inode **inodep) + { + int newfd; + int res; +@@ -891,6 +893,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *inode = NULL; + struct lo_inode *dir = lo_inode(req, parent); + ++ if (inodep) { ++ *inodep = NULL; ++ } ++ + /* + * name_to_handle_at() and open_by_handle_at() can reach here with fuse + * mount point in guest, but we don't have its inode info in the +@@ -953,7 +959,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; +- lo_inode_put(lo, &inode); ++ ++ /* Transfer ownership of inode pointer to caller or drop it */ ++ if (inodep) { ++ *inodep = inode; ++ } else { ++ lo_inode_put(lo, &inode); ++ } ++ + lo_inode_put(lo, &dir); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, +@@ -988,7 +1001,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + return; + } + +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + if (err) { + fuse_reply_err(req, err); + } else { +@@ -1098,7 +1111,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + +- saverr = lo_do_lookup(req, parent, name, &e); ++ saverr = lo_do_lookup(req, parent, name, &e, NULL); + if (saverr) { + goto out; + } +@@ -1599,7 +1612,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + if (plus) { + if (!is_dot_or_dotdot(name)) { +- err = lo_do_lookup(req, ino, name, &e); ++ err = lo_do_lookup(req, ino, name, &e, NULL); + if (err) { + goto error; + } +@@ -1793,7 +1806,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + } + if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; +-- +2.18.2 + diff --git a/SOURCES/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch b/SOURCES/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch new file mode 100644 index 0000000..5956dce --- /dev/null +++ b/SOURCES/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch @@ -0,0 +1,314 @@ +From cc9a776fba8ec62c862db55753107f19459dafa8 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 Feb 2021 23:14:56 -0500 +Subject: [PATCH 3/3] virtiofsd: prevent opening of special files + (CVE-2020-35517) + +RH-Author: Jon Maloy +Message-id: <20210209231456.1555472-4-jmaloy@redhat.com> +Patchwork-id: 101023 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/3] virtiofsd: prevent opening of special files (CVE-2020-35517) +Bugzilla: 1919111 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Greg Kurz +RH-Acked-by: Dr. David Alan Gilbert + +From: Stefan Hajnoczi + +A well-behaved FUSE client does not attempt to open special files with +FUSE_OPEN because they are handled on the client side (e.g. device nodes +are handled by client-side device drivers). + +The check to prevent virtiofsd from opening special files is missing in +a few cases, most notably FUSE_OPEN. A malicious client can cause +virtiofsd to open a device node, potentially allowing the guest to +escape. This can be exploited by a modified guest device driver. It is +not exploitable from guest userspace since the guest kernel will handle +special files inside the guest instead of sending FUSE requests. + +This patch fixes this issue by introducing the lo_inode_open() function +to check the file type before opening it. This is a short-term solution +because it does not prevent a compromised virtiofsd process from opening +device nodes on the host. + +Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT +| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not +necessary here. If the file exists and the user did not specify O_EXCL, +open it via lo_do_open(). + +Reported-by: Alex Xu +Fixes: CVE-2020-35517 +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Vivek Goyal +Reviewed-by: Greg Kurz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-4-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit a3fdbbc7f271bff7d53d0501b29d910ece0b3789) +Signed-off-by: Jon Maloy +Signed-off-by: Jon Maloy +--- + tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- + 1 file changed, 92 insertions(+), 52 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e5bd3d73e4..cb0992f2db 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -535,6 +535,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) + return fd; + } + ++/* ++ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a ++ * regular file or a directory. ++ * ++ * Use this helper function instead of raw openat(2) to prevent security issues ++ * when a malicious client opens special files such as block device nodes. ++ * Symlink inodes are also rejected since symlinks must already have been ++ * traversed on the client side. ++ */ ++static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, ++ int open_flags) ++{ ++ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); ++ int fd; ++ ++ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { ++ return -EBADF; ++ } ++ ++ /* ++ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier ++ * that the inode is not a special file but if an external process races ++ * with us then symlinks are traversed here. It is not possible to escape ++ * the shared directory since it is mounted as "/" though. ++ */ ++ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); ++ if (fd < 0) { ++ return -errno; ++ } ++ return fd; ++} ++ + static void lo_init(void *userdata, struct fuse_conn_info *conn) + { + struct lo_data *lo = (struct lo_data *)userdata; +@@ -788,9 +820,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + truncfd = fd; + } else { +- sprintf(procname, "%i", ifd); +- truncfd = openat(lo->proc_self_fd, procname, O_RDWR); ++ truncfd = lo_inode_open(lo, inode, O_RDWR); + if (truncfd < 0) { ++ errno = -truncfd; + goto out_err; + } + } +@@ -894,7 +926,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *dir = lo_inode(req, parent); + + if (inodep) { +- *inodep = NULL; ++ *inodep = NULL; /* in case there is an error */ + } + + /* +@@ -1725,19 +1757,26 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) + fi->flags &= ~O_DIRECT; + } + ++/* ++ * Open a regular file, set up an fd mapping, and fill out the struct ++ * fuse_file_info for it. If existing_fd is not negative, use that fd instead ++ * opening a new one. Takes ownership of existing_fd. ++ * ++ * Returns 0 on success or a positive errno. ++ */ + static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, +- struct fuse_file_info *fi) ++ int existing_fd, struct fuse_file_info *fi) + { +- char buf[64]; + ssize_t fh; +- int fd; ++ int fd = existing_fd; + + update_open_flags(lo->writeback, fi); + +- sprintf(buf, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return errno; ++ if (fd < 0) { ++ fd = lo_inode_open(lo, inode, fi->flags); ++ if (fd < 0) { ++ return -fd; ++ } + } + + pthread_mutex_lock(&lo->mutex); +@@ -1760,9 +1799,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +- int fd; ++ int fd = -1; + struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; ++ struct lo_inode *inode = NULL; + struct fuse_entry_param e; + int err; + struct lo_cred old = {}; +@@ -1788,36 +1828,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + + update_open_flags(lo->writeback, fi); + +- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, +- mode); ++ /* Try to create a new file but don't open existing files */ ++ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); + err = fd == -1 ? errno : 0; +- lo_restore_cred(&old); + +- if (!err) { +- ssize_t fh; ++ lo_restore_cred(&old); + +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(lo, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- err = ENOMEM; +- goto out; +- } ++ /* Ignore the error if file exists and O_EXCL was not given */ ++ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { ++ goto out; ++ } + +- fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e, NULL); ++ err = lo_do_lookup(req, parent, name, &e, &inode); ++ if (err) { ++ goto out; + } +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ ++ err = lo_do_open(lo, inode, fd, fi); ++ fd = -1; /* lo_do_open() takes ownership of fd */ ++ if (err) { ++ /* Undo lo_do_lookup() nlookup ref */ ++ unref_inode_lolocked(lo, inode, 1); + } + + out: ++ lo_inode_put(lo, &inode); + lo_inode_put(lo, &parent_inode); + + if (err) { ++ if (fd >= 0) { ++ close(fd); ++ } ++ + fuse_reply_err(req, err); + } else { + fuse_reply_create(req, &e, fi); +@@ -1831,7 +1873,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + pid_t pid, int *err) + { + struct lo_inode_plock *plock; +- char procname[64]; + int fd; + + plock = +@@ -1848,12 +1889,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + } + + /* Open another instance of file which can be used for ofd locks. */ +- sprintf(procname, "%i", inode->fd); +- + /* TODO: What if file is not writable? */ +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd == -1) { +- *err = errno; ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ *err = -fd; + free(plock); + return NULL; + } +@@ -2000,7 +2039,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + return; + } + +- err = lo_do_open(lo, inode, fi); ++ err = lo_do_open(lo, inode, -1, fi); + lo_inode_put(lo, &inode); + if (err) { + fuse_reply_err(req, err); +@@ -2056,39 +2095,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_data *lo = lo_data(req); + int res; + int fd; +- char *buf; + + fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, + (void *)fi); + +- if (!fi) { +- struct lo_data *lo = lo_data(req); +- +- res = asprintf(&buf, "%i", lo_fd(req, ino)); +- if (res == -1) { +- return (void)fuse_reply_err(req, errno); +- } ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } + +- fd = openat(lo->proc_self_fd, buf, O_RDWR); +- free(buf); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); ++ if (!fi) { ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ res = -fd; ++ goto out; + } + } else { + fd = lo_fi_fd(req, fi); + } + + if (datasync) { +- res = fdatasync(fd); ++ res = fdatasync(fd) == -1 ? errno : 0; + } else { +- res = fsync(fd); ++ res = fsync(fd) == -1 ? errno : 0; + } + if (!fi) { + close(fd); + } +- fuse_reply_err(req, res == -1 ? errno : 0); ++out: ++ lo_inode_put(lo, &inode); ++ fuse_reply_err(req, res); + } + + static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, +-- +2.18.2 + diff --git a/SOURCES/kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch b/SOURCES/kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch new file mode 100644 index 0000000..dbcf2a7 --- /dev/null +++ b/SOURCES/kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch @@ -0,0 +1,63 @@ +From ad50e0e2d310277f06a9c512fe6e31da183ead6e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 24 Feb 2021 11:30:34 -0500 +Subject: [PATCH 1/4] x86/cpu: Enable AVX512_VP2INTERSECT cpu feature + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210224113037.15599-2-dgilbert@redhat.com> +Patchwork-id: 101203 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 1/4] x86/cpu: Enable AVX512_VP2INTERSECT cpu feature +Bugzilla: 1790620 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Peter Xu + +From: Cathy Zhang + +AVX512_VP2INTERSECT compute vector pair intersection to a pair +of mask registers, which is introduced with intel Tiger Lake, +defining as CPUID.(EAX=7,ECX=0):EDX[bit 08]. + +Refer to the following release spec: +https://software.intel.com/sites/default/files/managed/c5/15/\ +architecture-instruction-set-extensions-programming-reference.pdf + +Signed-off-by: Cathy Zhang +Message-Id: <1586760758-13638-1-git-send-email-cathy.zhang@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 353f98c9ad52ff4b8cfe553c90be04f747a14c98) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index ff39fc9905..67dab94aa5 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1078,7 +1078,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .feat_names = { + NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", + NULL, NULL, NULL, NULL, +- NULL, NULL, "md-clear", NULL, ++ "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL /* pconfig */, NULL, + NULL, NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index f3da25cb8a..8e2e52ed31 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -770,6 +770,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) + /* AVX512 Multiply Accumulation Single Precision */ + #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) ++/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ ++#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* Speculation Control */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ +-- +2.27.0 + diff --git a/SOURCES/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch b/SOURCES/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch new file mode 100644 index 0000000..9ef6d04 --- /dev/null +++ b/SOURCES/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch @@ -0,0 +1,91 @@ +From 655e723a5190206302f6cc4f2e794563b8e1c226 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 24 Feb 2021 11:30:36 -0500 +Subject: [PATCH 3/4] x86/cpu: Populate SVM CPUID feature bits + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210224113037.15599-4-dgilbert@redhat.com> +Patchwork-id: 101200 +O-Subject: [RHEL-8.4.0 qemu-kvm PATCH 3/4] x86/cpu: Populate SVM CPUID feature bits +Bugzilla: 1790620 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Peter Xu + +From: Wei Huang + +Newer AMD CPUs will add CPUID_0x8000000A_EDX[28] bit, which indicates +that SVM instructions (VMRUN/VMSAVE/VMLOAD) will trigger #VMEXIT before +CPU checking their EAX against reserved memory regions. This change will +allow the hypervisor to avoid intercepting #GP and emulating SVM +instructions. KVM turns on this CPUID bit for nested VMs. In order to +support it, let us populate this bit, along with other SVM feature bits, +in FEAT_SVM. + +Signed-off-by: Wei Huang +Message-Id: <20210126202456.589932-1-wei.huang2@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5447089c2b3b084b51670af36fc86ee3979e04be) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 6 +++--- + target/i386/cpu.h | 24 ++++++++++++++---------- + 2 files changed, 17 insertions(+), 13 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f6a9ed84b3..7227c803c3 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1026,11 +1026,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "npt", "lbrv", "svm-lock", "nrip-save", + "tsc-scale", "vmcb-clean", "flushbyasid", "decodeassists", + NULL, NULL, "pause-filter", NULL, +- "pfthreshold", NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ "pfthreshold", "avic", NULL, "v-vmsave-vmload", ++ "vgif", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, ++ "svme-addr-chk", NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, + .tcg_features = TCG_SVM_FEATURES, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index f5a4efcec6..e1b67910c2 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -667,16 +667,20 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_EXT3_PERFCORE (1U << 23) + #define CPUID_EXT3_PERFNB (1U << 24) + +-#define CPUID_SVM_NPT (1U << 0) +-#define CPUID_SVM_LBRV (1U << 1) +-#define CPUID_SVM_SVMLOCK (1U << 2) +-#define CPUID_SVM_NRIPSAVE (1U << 3) +-#define CPUID_SVM_TSCSCALE (1U << 4) +-#define CPUID_SVM_VMCBCLEAN (1U << 5) +-#define CPUID_SVM_FLUSHASID (1U << 6) +-#define CPUID_SVM_DECODEASSIST (1U << 7) +-#define CPUID_SVM_PAUSEFILTER (1U << 10) +-#define CPUID_SVM_PFTHRESHOLD (1U << 12) ++#define CPUID_SVM_NPT (1U << 0) ++#define CPUID_SVM_LBRV (1U << 1) ++#define CPUID_SVM_SVMLOCK (1U << 2) ++#define CPUID_SVM_NRIPSAVE (1U << 3) ++#define CPUID_SVM_TSCSCALE (1U << 4) ++#define CPUID_SVM_VMCBCLEAN (1U << 5) ++#define CPUID_SVM_FLUSHASID (1U << 6) ++#define CPUID_SVM_DECODEASSIST (1U << 7) ++#define CPUID_SVM_PAUSEFILTER (1U << 10) ++#define CPUID_SVM_PFTHRESHOLD (1U << 12) ++#define CPUID_SVM_AVIC (1U << 13) ++#define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) ++#define CPUID_SVM_VGIF (1U << 16) ++#define CPUID_SVM_SVME_ADDR_CHK (1U << 28) + + /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ + #define CPUID_7_0_EBX_FSGSBASE (1U << 0) +-- +2.27.0 + diff --git a/SOURCES/udev-kvm-check.c b/SOURCES/udev-kvm-check.c index cb0ecba..928b9de 100644 --- a/SOURCES/udev-kvm-check.c +++ b/SOURCES/udev-kvm-check.c @@ -32,14 +32,6 @@ #define COUNT_MSG \ "%d %s now active" -#define SUBSCRIPTION_MSG \ - "%d %s now active; your Red Hat Enterprise Linux subscription" \ - " limit is %d guests. Please review your Red Hat Enterprise Linux" \ - " subscription agreement or contact your Red Hat" \ - " support representative for more information. You" \ - " may review the Red Hat Enterprise subscription" \ - " limits at http://www.redhat.com/rhel-virt-limits" - int get_threshold_from_file(FILE *fp) { static const char key[] = "THRESHOLD="; @@ -139,13 +131,6 @@ void emit_count_message(int count) closelog(); } -void emit_subscription_message(int count, int threshold) -{ - openlog(FACILITY, LOG_CONS, LOG_USER); - syslog(LOG_WARNING, SUBSCRIPTION_MSG, count, guest(count), threshold); - closelog(); -} - int main(int argc, char **argv) { int count, threshold; @@ -157,10 +142,8 @@ int main(int argc, char **argv) threshold = get_threshold(); if (!strcmp(argv[2], "create")) { - if (threshold == 0) { + if (threshold == 0 || count > threshold) { emit_count_message(count); - } else if (count > threshold) { - emit_subscription_message(count, threshold); } } else { if (count >= threshold) { diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 1661dbc..4a65e5f 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 35%{?dist} +Release: 48%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -964,6 +964,172 @@ Patch403: kvm-pc-bios-s390-ccw-Allow-booting-in-case-the-first-vir.patch Patch404: kvm-pc-bios-s390-ccw-main-Remove-superfluous-call-to-ena.patch # For bz#1846975 - Failed to boot up a s390x guest with virtio-blk-ccw if attaching a virtio-scsi-ccw bus in previous Patch405: kvm-aio-posix-completely-stop-polling-when-disabled.patch +# For bz#1884531 - qemu-ga aborts after guest-shutdown command +Patch406: kvm-qga-fix-assert-regression-on-guest-shutdown.patch +# For bz#1857733 - [IBM 8.4 FEAT] KVM: Add support for virtio-fs on s390x - qemu part +Patch407: kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch +# For bz#1857733 - [IBM 8.4 FEAT] KVM: Add support for virtio-fs on s390x - qemu part +Patch408: kvm-virtio-add-vhost-user-fs-ccw-device.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch410: kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch411: kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch412: kvm-s390-sclp-rework-sclp-boundary-checks.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch413: kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch414: kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch415: kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch416: kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch417: kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch418: kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch419: kvm-s390-guest-support-for-diagnose-0x318.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch420: kvm-s390x-pv-Remove-sclp-boundary-checks.patch +# For bz#1798506 - [IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part +Patch421: kvm-s390x-pv-Fix-diag318-PV-fencing.patch +# For bz#1659412 - [IBM 8.4 FEAT] KVM enablement for enhanced hardware diagnose data of guest kernel on s390x - qemu part +Patch422: kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch +# For bz#1898700 - qemu-kvm for RHEL-8.4 doesn't build due to a possible incompatibility with systemtap-sdt-devel-4.4-1 +Patch423: kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch +# For bz#1860994 - CVE-2020-16092 virt:rhel/qemu-kvm: QEMU: reachable assertion failure in net_tx_pkt_add_raw_fragment() in hw/net/net_tx_pkt.c [rhel-8] +Patch424: kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch +# For bz#1880546 - qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available +Patch425: kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch +# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) +Patch426: kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch +# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) +Patch427: kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch +# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) +Patch428: kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch +# For bz#1903135 - RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm) +Patch429: kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch430: kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch431: kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch +# For bz#1902237 - CVE-2020-29129 CVE-2020-29130 virt:rhel/qemu-kvm: QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets [rhel-8] +Patch432: kvm-slirp-check-pkt_len-before-reading-protocol-header.patch +# For bz#1905386 - RHEL8.3 - s390x/s390-virtio-ccw: Reset PCI devices during subsystem reset (qemu-kvm) +Patch433: kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch434: kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch435: kvm-error-Fix-examples-in-error.h-s-big-comment.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch436: kvm-error-Improve-error.h-s-big-comment.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch437: kvm-error-Document-Error-API-usage-rules.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch438: kvm-error-New-macro-ERRP_GUARD.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch439: kvm-qga-add-command-guest-get-disks.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch440: kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch441: kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch442: kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch +# For bz#1859494 - Report logical_name for disks without mounted file-system +Patch443: kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch +# For bz#1910267 - There is no soft link '/etc/qemu-kvm/fsfreeze-hook' +Patch444: kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch +# For bz#1910326 - Incorrect hostname returned by qga command 'guest-get-host-name' +Patch445: kvm-qga-rename-Error-parameter-to-more-common-errp.patch +# For bz#1910326 - Incorrect hostname returned by qga command 'guest-get-host-name' +Patch446: kvm-util-Introduce-qemu_get_host_name.patch +# For bz#1910326 - Incorrect hostname returned by qga command 'guest-get-host-name' +Patch447: kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch449: kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch450: kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch451: kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch452: kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch453: kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch454: kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch455: kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch456: kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch457: kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch458: kvm-memory-Add-IOMMUTLBEvent.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch459: kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch460: kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch +# For bz#1843852 - qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed. +Patch461: kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch +# For bz#1904393 - CVE-2020-27821 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-8] +Patch462: kvm-memory-clamp-cached-translation-in-case-it-points-to.patch +# For bz#1898628 - CVE-2020-25723 virt:rhel/qemu-kvm: QEMU: assertion failure through usb_packet_unmap() in hw/usb/hcd-ehci.c [rhel-8] +Patch463: kvm-hw-ehci-check-return-value-of-usb_packet_map.patch +# For bz#1903070 - CVE-2020-25707 CVE-2020-28916 virt:rhel/qemu-kvm: various flaws [rhel-8] +Patch464: kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch +# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) +Patch465: kvm-linux-headers-add-vfio-DMA-available-capability.patch +# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) +Patch466: kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch +# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) +Patch467: kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch +# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) +Patch468: kvm-vfio-Find-DMA-available-capability.patch +# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) +Patch469: kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch +# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) +Patch470: kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch +# For bz#1905391 - RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm) +Patch471: kvm-s390x-fix-build-for-without-default-devices.patch +# For bz#1918054 - CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-8.4.0] +Patch472: kvm-Drop-bogus-IPv6-messages.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch473: kvm-spapr-Improve-handling-of-fdt-buffer-size.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch474: kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch475: kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch476: kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch477: kvm-spapr-Allow-memory-unplug-to-always-succeed.patch +# For bz#1901837 - Failed to hotunplug pc-dimm device +Patch478: kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch +# For bz#1834281 - qemu-img convert abort when converting image with unaligned size +Patch479: kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch +# For bz#1834281 - qemu-img convert abort when converting image with unaligned size +Patch480: kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch +# For bz#1912974 - CVE-2020-11947 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in iscsi_aio_ioctl_cb() in block/iscsi.c may lead to information disclosure [rhel-8] +Patch481: kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch +# For bz#1919111 - CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0] +Patch482: kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch +# For bz#1919111 - CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0] +Patch483: kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch +# For bz#1919111 - CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0] +Patch484: kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch +# For bz#1912891 - [ppc64le] --disk cdimage.iso,bus=usb fails to boot +Patch486: kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch +# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train +Patch487: kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch +# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train +Patch488: kvm-target-i386-add-fast-short-REP-MOV-support.patch +# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train +Patch489: kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch +# For bz#1790620 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train +Patch490: kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch +# For bz#1917451 - CVE-2020-29443 virt:rhel/qemu-kvm: QEMU: ide: atapi: OOB access while processing read commands [rhel-8.4.0] +Patch491: kvm-ide-atapi-check-logical-block-address-and-read-size-.patch BuildRequires: wget BuildRequires: rpm-build @@ -1324,6 +1490,9 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-libpmem \ %endif --enable-vhost-user \ +%ifarch s390x + --enable-vhost-user-fs \ +%endif %ifarch %{ix86} x86_64 --enable-avx2 \ %else @@ -1335,7 +1504,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --audio-drv-list= \ --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ --with-coroutine=ucontext \ - --tls-priority=NORMAL \ + --tls-priority=@QEMU,SYSTEM \ --disable-bluez \ --disable-brlapi \ --enable-cap-ng \ @@ -1526,6 +1695,11 @@ install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrules install -D --preserve-timestamps \ scripts/qemu-guest-agent/fsfreeze-hook \ $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook +# Workaround for the missing /etc/qemu-kvm/fsfreeze-hook +# Please, do not carry this over to RHEL-9 +mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/ +ln -s %{_sysconfdir}/qemu-ga/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/fsfreeze-hook # - the directory for user scripts: mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d @@ -1746,6 +1920,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %systemd_postun_with_restart ksm.service %systemd_postun_with_restart ksmtuned.service +%post -n qemu-guest-agent +%systemd_post qemu-guest-agent.service +%preun -n qemu-guest-agent +%systemd_preun qemu-guest-agent.service +%postun -n qemu-guest-agent +%systemd_postun_with_restart qemu-guest-agent.service + %files # Deliberately empty @@ -1871,6 +2052,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_udevrulesdir}/99-qemu-guest-agent.rules %config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga %{_sysconfdir}/qemu-ga +%{_sysconfdir}/qemu-kvm/fsfreeze-hook %{_datadir}/%{name}/qemu-ga %dir %{_localstatedir}/log/qemu-ga @@ -1896,6 +2078,179 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Mar 16 2021 Danilo Cesar Lemes de Paula - 4.2.0-48.el8 +- kvm-ide-atapi-check-logical-block-address-and-read-size-.patch [bz#1917451] +- Resolves: bz#1917451 + (CVE-2020-29443 virt:rhel/qemu-kvm: QEMU: ide: atapi: OOB access while processing read commands [rhel-8.4.0]) + +* Mon Mar 08 2021 Danilo Cesar Lemes de Paula - 4.2.0-47.el8 +- kvm-x86-cpu-Enable-AVX512_VP2INTERSECT-cpu-feature.patch [bz#1790620] +- kvm-target-i386-add-fast-short-REP-MOV-support.patch [bz#1790620] +- kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch [bz#1790620] +- kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch [bz#1790620] +- Resolves: bz#1790620 + ([RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Slow Train) + +* Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 4.2.0-46.el8 +- kvm-redhat-makes-qemu-respect-system-s-crypto-profile.patch [bz#1902960] +- kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch [bz#1912891] +- Resolves: bz#1902960 + (QEMU doesn't honour system crypto policies) +- Resolves: bz#1912891 + ([ppc64le] --disk cdimage.iso,bus=usb fails to boot) + +* Wed Feb 10 2021 Jon Maloy - 4.2.0-45.el8 +- kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch [bz#1919111] +- kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch [bz#1919111] +- kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch [bz#1919111] +- Resolves: bz#1919111 + (CVE-2020-35517 virt:rhel/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-8.4.0]) + +* Tue Feb 02 2021 Jon Maloy - 4.2.0-44.el8 +- kvm-spapr-Improve-handling-of-fdt-buffer-size.patch [bz#1901837] +- kvm-spapr-Fold-h_cas_compose_response-into-h_client_arch.patch [bz#1901837] +- kvm-spapr-Don-t-use-spapr_drc_needed-in-CAS-code.patch [bz#1901837] +- kvm-spapr-Fix-handling-of-unplugged-devices-during-CAS-a.patch [bz#1901837] +- kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1901837] +- kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1901837] +- kvm-block-Require-aligned-image-size-to-avoid-assertion-.patch [bz#1834281] +- kvm-file-posix-Allow-byte-aligned-O_DIRECT-with-NFS.patch [bz#1834281] +- kvm-block-iscsi-fix-heap-buffer-overflow-in-iscsi_aio_io.patch [bz#1912974] +- Resolves: bz#1834281 + (qemu-img convert abort when converting image with unaligned size) +- Resolves: bz#1901837 + (Failed to hotunplug pc-dimm device) +- Resolves: bz#1912974 + (CVE-2020-11947 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in iscsi_aio_ioctl_cb() in block/iscsi.c may lead to information disclosure [rhel-8]) + +* Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 4.2.0-43.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1918054] +- Resolves: bz#1918054 + (CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-8.4.0]) + +* Thu Jan 21 2021 Danilo Cesar Lemes de Paula - 4.2.0-42.el8 +- kvm-linux-headers-add-vfio-DMA-available-capability.patch [bz#1905391] +- kvm-s390x-pci-Move-header-files-to-include-hw-s390x.patch [bz#1905391] +- kvm-vfio-Create-shared-routine-for-scanning-info-capabil.patch [bz#1905391] +- kvm-vfio-Find-DMA-available-capability.patch [bz#1905391] +- kvm-s390x-pci-Add-routine-to-get-the-vfio-dma-available-.patch [bz#1905391] +- kvm-s390x-pci-Honor-DMA-limits-set-by-vfio.patch [bz#1905391] +- kvm-s390x-fix-build-for-without-default-devices.patch [bz#1905391] +- Resolves: bz#1905391 + (RHEL8.4 - s390x/pci: Honor vfio DMA limiting (qemu-kvm)) + +* Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 4.2.0-41.el8 +- kvm-udev-kvm-check-remove-the-exceeded-subscription-limi.patch [bz#1909244] +- kvm-hw-arm-smmu-common-Factorize-some-code-in-smmu_ptw_6.patch [bz#1843852] +- kvm-hw-arm-smmu-common-Add-IOTLB-helpers.patch [bz#1843852] +- kvm-hw-arm-smmu-Introduce-smmu_get_iotlb_key.patch [bz#1843852] +- kvm-hw-arm-smmu-Introduce-SMMUTLBEntry-for-PTW-and-IOTLB.patch [bz#1843852] +- kvm-hw-arm-smmu-common-Manage-IOTLB-block-entries.patch [bz#1843852] +- kvm-hw-arm-smmuv3-Introduce-smmuv3_s1_range_inval-helper.patch [bz#1843852] +- kvm-hw-arm-smmuv3-Get-prepared-for-range-invalidation.patch [bz#1843852] +- kvm-hw-arm-smmuv3-Fix-potential-integer-overflow-CID-143.patch [bz#1843852] +- kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch [bz#1843852] +- kvm-memory-Add-IOMMUTLBEvent.patch [bz#1843852] +- kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch [bz#1843852] +- kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch [bz#1843852] +- kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch [bz#1843852] +- kvm-memory-clamp-cached-translation-in-case-it-points-to.patch [bz#1904393] +- kvm-hw-ehci-check-return-value-of-usb_packet_map.patch [bz#1898628] +- kvm-hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch [bz#1903070] +- Resolves: bz#1909244 + (Remove KVM guest count and limit info message) +- Resolves: bz#1843852 + (qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed.) +- Resolves: bz#1904393 + (CVE-2020-27821 virt:rhel/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-8]) +- Resolves: bz#1898628 + (CVE-2020-25723 virt:rhel/qemu-kvm: QEMU: assertion failure through usb_packet_unmap() in hw/usb/hcd-ehci.c [rhel-8]) +- Resolves: bz#1903070 + (CVE-2020-25707 CVE-2020-28916 virt:rhel/qemu-kvm: various flaws [rhel-8]) + +* Mon Jan 04 2021 Danilo Cesar Lemes de Paula - 4.2.0-40.el8 +- kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1910267] +- kvm-qga-rename-Error-parameter-to-more-common-errp.patch [bz#1910326] +- kvm-util-Introduce-qemu_get_host_name.patch [bz#1910326] +- kvm-qga-Use-qemu_get_host_name-instead-of-g_get_host_nam.patch [bz#1910326] +- kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1910220] +- Resolves: bz#1910267 + (There is no soft link '/etc/qemu-kvm/fsfreeze-hook') +- Resolves: bz#1910326 + (Incorrect hostname returned by qga command 'guest-get-host-name') +- Resolves: bz#1910220 + (qemu-ga service still active and can work after qemu-guest-agent been removed) + +* Wed Dec 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-39.el8 +- kvm-ppc-spapr-Add-hotremovable-flag-on-DIMM-LMBs-on-drme.patch [bz#1901837] +- kvm-ppc-spapr-re-assert-IRQs-during-event-scan-if-there-.patch [bz#1901837] +- kvm-slirp-check-pkt_len-before-reading-protocol-header.patch [bz#1902237] +- kvm-s390x-s390-virtio-ccw-Reset-PCI-devices-during-subsy.patch [bz#1905386] +- kvm-qapi-enable-use-of-g_autoptr-with-QAPI-types.patch [bz#1859494] +- kvm-error-Fix-examples-in-error.h-s-big-comment.patch [bz#1859494] +- kvm-error-Improve-error.h-s-big-comment.patch [bz#1859494] +- kvm-error-Document-Error-API-usage-rules.patch [bz#1859494] +- kvm-error-New-macro-ERRP_GUARD.patch [bz#1859494] +- kvm-qga-add-command-guest-get-disks.patch [bz#1859494] +- kvm-qga-add-implementation-of-guest-get-disks-for-Linux.patch [bz#1859494] +- kvm-qga-add-implementation-of-guest-get-disks-for-Window.patch [bz#1859494] +- kvm-qga-fix-missing-closedir-in-qmp_guest_get_disks.patch [bz#1859494] +- kvm-qga-update-schema-for-guest-get-disks-dependents-fie.patch [bz#1859494] +- Resolves: bz#1859494 + (Report logical_name for disks without mounted file-system) +- Resolves: bz#1901837 + (Failed to hotunplug pc-dimm device) +- Resolves: bz#1902237 + (CVE-2020-29129 CVE-2020-29130 virt:rhel/qemu-kvm: QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets [rhel-8]) +- Resolves: bz#1905386 + (RHEL8.3 - s390x/s390-virtio-ccw: Reset PCI devices during subsystem reset (qemu-kvm)) + +* Fri Dec 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-38.el8 +- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1880546] +- kvm-pc-bios-s390x-Rename-PSW_MASK_ZMODE-to-PSW_MASK_64.patch [bz#1903135] +- kvm-pc-bios-s390x-Use-PSW-masks-where-possible-and-intro.patch [bz#1903135] +- kvm-pc-bios-s390x-Ensure-Read-IPL-memory-is-clean.patch [bz#1903135] +- kvm-pc-bios-s390x-Clear-out-leftover-S390EP-string.patch [bz#1903135] +- Resolves: bz#1880546 + (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available) +- Resolves: bz#1903135 + (RHEL8.3 - KVM Distro install to vfio_ccw backed DASD gets error at the reboot step (qemu-kvm)) + +* Mon Nov 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-37.el8 +- kvm-hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch [bz#1860994] +- Resolves: bz#1860994 + (CVE-2020-16092 virt:rhel/qemu-kvm: QEMU: reachable assertion failure in net_tx_pkt_add_raw_fragment() in hw/net/net_tx_pkt.c [rhel-8]) + +* Fri Nov 20 2020 Danilo Cesar Lemes de Paula - 4.2.0-36.el8 +- kvm-qga-fix-assert-regression-on-guest-shutdown.patch [bz#1884531] +- kvm-libvhost-user-handle-endianness-as-mandated-by-the-s.patch [bz#1857733] +- kvm-virtio-add-vhost-user-fs-ccw-device.patch [bz#1857733] +- kvm-Ensure-vhost-user-fs-is-enabled-on-s390x.patch [bz#1857733] +- kvm-s390x-sclp.c-remove-unneeded-label-in-sclp_service_c.patch [bz#1798506] +- kvm-s390-sclp-get-machine-once-during-read-scp-cpu-info.patch [bz#1798506] +- kvm-s390-sclp-rework-sclp-boundary-checks.patch [bz#1798506] +- kvm-s390-sclp-read-sccb-from-mem-based-on-provided-lengt.patch [bz#1798506] +- kvm-s390-sclp-check-sccb-len-before-filling-in-data.patch [bz#1798506] +- kvm-s390-sclp-use-cpu-offset-to-locate-cpu-entries.patch [bz#1798506] +- kvm-s390-sclp-add-extended-length-sccb-support-for-kvm-g.patch [bz#1798506] +- kvm-linux-headers-Partial-update-against-Linux-5.9-rc4.patch [bz#1798506] +- kvm-misc-Replace-zero-length-arrays-with-flexible-array-.patch [bz#1798506] +- kvm-s390-guest-support-for-diagnose-0x318.patch [bz#1798506] +- kvm-s390x-pv-Remove-sclp-boundary-checks.patch [bz#1798506] +- kvm-s390x-pv-Fix-diag318-PV-fencing.patch [bz#1798506] +- kvm-s390-kvm-fix-diag318-propagation-and-reset-functiona.patch [bz#1659412] +- kvm-trace-use-STAP_SDT_V2-to-work-around-symbol-visibili.patch [bz#1898700] +- Resolves: bz#1659412 + ([IBM 8.4 FEAT] KVM enablement for enhanced hardware diagnose data of guest kernel on s390x - qemu part) +- Resolves: bz#1798506 + ([IBM 8.4 FEAT] KVM: Support extended-length SCCBs - qemu part) +- Resolves: bz#1857733 + ([IBM 8.4 FEAT] KVM: Add support for virtio-fs on s390x - qemu part) +- Resolves: bz#1884531 + (qemu-ga aborts after guest-shutdown command) +- Resolves: bz#1898700 + (qemu-kvm for RHEL-8.4 doesn't build due to a possible incompatibility with systemtap-sdt-devel-4.4-1) + * Mon Oct 19 2020 Danilo Cesar Lemes de Paula - 4.2.0-35.el8 - kvm-qga-commands-posix-Rework-build_guest_fsinfo_for_rea.patch [bz#1755075] - kvm-qga-commands-posix-Move-the-udev-code-from-the-pci-t.patch [bz#1755075]